Commit 39fa0d25 by linye

1. merge with liyinqiao branch 2. support read with float16 datatype

parent 86adc288
...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -43,18 +43,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH) if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _HardTanHBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_IDENTITY) else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _IdentityBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_LOGSOFTMAX){ else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in logsoftmax!");
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS); _LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, NULL, leadDim, NOLOSS);
} }
else if(operID == FUNC_RECTIFY) else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _RectifyBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SIGMOID) else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS); _SigmoidBackward(output, input, output->grad, input->grad);
else if(operID == FUNC_SOFTMAX){ else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0); int leadDim = income.GetParamInt(0);
CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!"); CheckNTErrors(leadDim >= 0 && leadDim < input->order, "wrong leading dimension in softmax!");
......
...@@ -61,7 +61,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -61,7 +61,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
if(operID == LOSS_CROSSENTROPY) { if(operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3) if (income.tailNum == 3)
padding = income.tails[2]; padding = income.tails[2];
leadingDim = income.GetParamInt(0); leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!"); CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim); _CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
} }
...@@ -90,39 +90,39 @@ compute dE/dx for a given function y = f(x) ...@@ -90,39 +90,39 @@ compute dE/dx for a given function y = f(x)
>> params - parameters of the function >> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x, //void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
CheckNTErrors(gold && y && x, "Empty input tensors!"); // CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!"); // CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id"); // CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
//
if(funcID == FUNC_HARDTANH){ // if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName); // _HardTanHBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_IDENTITY){ // else if(funcID == FUNC_IDENTITY){
_IdentityBackward(gold, y, x, dedy, dedx, lossName); // _IdentityBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_LOGSOFTMAX){ // else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _LogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else if(funcID == FUNC_RECTIFY){ // else if(funcID == FUNC_RECTIFY){
_RectifyBackward(gold, y, x, dedy, dedx, lossName); // _RectifyBackward(gold, y, x, dedy, dedx, lossName);
} // }
else if(funcID == FUNC_SIGMOID){ // else if(funcID == FUNC_SIGMOID){
_SigmoidBackward(gold, y, x, dedy, dedx, lossName); // _SigmoidBackward(gold, y, x, dedy, dedx, lossName);
}else if(funcID == FUNC_SOFTMAX){ // }else if(funcID == FUNC_SOFTMAX){
int leadDim = *(int*)params; // int leadDim = *(int*)params;
_SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); // _SoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
} // }
else{ // else{
ShowNTErrors("wrong function found when call the backward process!"); // ShowNTErrors("wrong function found when call the backward process!");
} // }
//
} //}
/* /*
compute dE/dy for variable y and error(loss) function E compute dE/dy for variable y and error(loss) function E
...@@ -131,19 +131,27 @@ compute dE/dy for variable y and error(loss) function E ...@@ -131,19 +131,27 @@ compute dE/dy for variable y and error(loss) function E
>> dedy - dE/dy >> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy >> lossName - name of the loss, e.g., cross entropy
*/ */
void XLossGrad::Compute(XTensor * gold, XTensor * y, //void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * padding, // XTensor * dedy, XTensor * padding,
LOSS_FUNCTION_NAME lossName) // LOSS_FUNCTION_NAME lossName)
{ //{
if(gold == NULL){ // if(gold == NULL){
_SetDataFixed(dedy, 1.0F); // if(dedy->dataType == X_FLOAT)
return; // _SetDataFixedFloat(dedy, 1.0F);
} // else if(dedy->dataType == X_DOUBLE)
// _SetDataFixedDouble(dedy, 1.0);
//_LossBackward(dedy, gold, y, lossName); // else if(dedy->dataType == X_INT)
if(lossName == CROSSENTROPY) // _SetDataFixedInt(dedy, 1);
_CrossEntropyBackward(dedy, y, gold, NULL, padding); // else{
// ShowNTErrors("TODO");
} // }
// return;
// }
//
// //_LossBackward(dedy, gold, y, lossName);
// if(lossName == CROSSENTROPY)
// _CrossEntropyBackward(dedy, y, gold, NULL, padding);
//
//}
} }
\ No newline at end of file
...@@ -43,11 +43,11 @@ public: ...@@ -43,11 +43,11 @@ public:
static static
bool IsLossOP(XTensor * node); bool IsLossOP(XTensor * node);
/* compute dE/dx for a given function y = f(x) */ ///* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x, //void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, // XTensor * dedy, XTensor * dedx, XTensor * padding,
int funcID, void * params, // int funcID, void * params,
LOSS_FUNCTION_NAME lossName); // LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */ /* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y, void Compute(XTensor * gold, XTensor * y,
......
...@@ -530,7 +530,7 @@ void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient) ...@@ -530,7 +530,7 @@ void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient)
XTensor * dedc = node->grad; XTensor * dedc = node->grad;
XTensor * deda = a->grad; XTensor * deda = a->grad;
XTensor * dedb = b->grad; XTensor * dedb = b->grad;
if(a->order == 2 && b->order == 2) if(a->order == 2 && b->order == 2)
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient); GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){ else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){
...@@ -735,7 +735,7 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient) ...@@ -735,7 +735,7 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
if (!isEfficient || b->isGrad) { if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
_Multiply(node->grad, a, b->grad, 1.0F);; _Multiply(node->grad, a, b->grad, 1.0F);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient) ...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
/* dE/da */ /* dE/da */
_MultiplyDim(node->grad, b, a->grad, n, 1.0F); _MultiplyDim(node->grad, b, a->grad, n, 1.0F);
/* dE/db */ /* dE/db */
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem); XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem);
_Multiply(node->grad, a, bGradTMP); _Multiply(node->grad, a, bGradTMP);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum/dimSize[order - 1]; reshapedSize[0] = a->unitNum/dimSize[order - 1];
...@@ -855,7 +855,6 @@ void XMathGrad::GradMultiplyBroadcast(XTensor * node, bool isEfficient) ...@@ -855,7 +855,6 @@ void XMathGrad::GradMultiplyBroadcast(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
_MultiplyBroadcast(node->grad, b, a->grad, 1.0F); _MultiplyBroadcast(node->grad, b, a->grad, 1.0F);
...@@ -1079,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta ...@@ -1079,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
*/ */
void XMathGrad::GradSubDim(XTensor * node, bool isEfficient) void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
int n = income.GetParamInt(0); int n = income.GetParamInt(0);
DTYPE beta = income.GetParam(1); DTYPE beta = income.GetParam(1);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum / dimSize[order - 1]; reshapedSize[0] = a->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1]; reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the /* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize); node->grad->Reshape(2, reshapedSize);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0); _ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(node->grad, b->grad, 0); _ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
} }
else{ else{
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1; reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n]; reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1; reshapedSize[2] = 1;
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
if(i < n) if(i < n)
reshapedSize[0] *= dimSize[i]; reshapedSize[0] *= dimSize[i];
} }
reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]); reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|. /* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */ Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize); node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0); _ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/* /*
...@@ -1319,7 +1318,7 @@ void XMathGrad::GradSumBroadcast(XTensor * node, bool isEfficient) ...@@ -1319,7 +1318,7 @@ void XMathGrad::GradSumBroadcast(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0); //DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
......
...@@ -146,10 +146,10 @@ private: ...@@ -146,10 +146,10 @@ private:
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node, bool isEfficient); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
......
...@@ -271,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -271,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL; XTensor * last = NULL;
XList smalls(income.tailNum); TensorList smalls(income.tailNum);
XList smallsGrad(income.tailNum); TensorList smallsGrad(income.tailNum);
bool mergeOnly = true; bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){ for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i]; XTensor * tail = income.tails[i];
...@@ -440,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -440,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
/* we compute the gradient for current node, rather than for /* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */ child node, i.e., we use the outgoing edge here */
XLink &outgo = node->outgo; XLink &outgo = node->outgo;
XList splits(outgo.tailNum); TensorList splits(outgo.tailNum);
int whereToSplit = -1; int whereToSplit = -1;
int splitNum = 0; int splitNum = 0;
...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){ if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0); int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1); int splitID = income.GetParamInt(1);
if(whereToSplit < 0) if(whereToSplit < 0)
whereToSplit = w; whereToSplit = w;
splitNum++; splitNum++;
......
...@@ -54,7 +54,7 @@ private: ...@@ -54,7 +54,7 @@ private:
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for dropout with indexs */ /* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static static
void GradDropoutWithIndex(XTensor * node, bool isEfficent); void GradDropoutWithIndex(XTensor * node, bool isEfficent);
......
...@@ -37,16 +37,16 @@ struct XNet ...@@ -37,16 +37,16 @@ struct XNet
unsigned int id; unsigned int id;
/* tensor nodes of the network (in order) */ /* tensor nodes of the network (in order) */
XList nodes; TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/ /* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes; TensorList gradNodes;
/* output nodes of the network */ /* output nodes of the network */
XList outputs; TensorList outputs;
/* input nodes of the network */ /* input nodes of the network */
XList inputs; TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */ /* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient; bool isGradEfficient;
...@@ -71,15 +71,15 @@ struct XNet ...@@ -71,15 +71,15 @@ struct XNet
/* backward propagation to obtain gradient /* backward propagation to obtain gradient
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient /* backward propagation to obtain gradient
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
...@@ -93,10 +93,10 @@ struct XNet ...@@ -93,10 +93,10 @@ struct XNet
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots); void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */ /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code); void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */ /* dump network information */
void Dump(FILE * file); void Dump(FILE * file);
......
...@@ -51,14 +51,12 @@ initialize the model ...@@ -51,14 +51,12 @@ initialize the model
>> myIgnored - number of position ignored in attention (from the begining) >> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask >> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TAttention::InitModel(int argc, char ** argv, void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
isMasked = myIsMasked; isMasked = myIsMasked;
ignored = myIgnored; ignored = myIgnored;
...@@ -71,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -71,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem); InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID, mem); InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
...@@ -127,7 +125,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -127,7 +125,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
XTensor q2; XTensor q2;
XTensor v2; XTensor v2;
XTensor kqv2; XTensor kqv2;
XList split; TensorList split;
kqv2 = MMul(kqv, wbig); kqv2 = MMul(kqv, wbig);
...@@ -135,9 +133,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -135,9 +133,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1); int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3; int d3 = kqv2.GetDim(2) / 3;
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2); split.Add(&q2);
split.Add(&k2); split.Add(&k2);
......
...@@ -42,9 +42,6 @@ public: ...@@ -42,9 +42,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* head number */ /* head number */
int nhead; int nhead;
...@@ -61,7 +58,7 @@ public: ...@@ -61,7 +58,7 @@ public:
XTensor wa; XTensor wa;
XTensor wbig; XTensor wbig;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
...@@ -94,7 +91,7 @@ public: ...@@ -94,7 +91,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining); XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-25
* it is cold today but i'll move to a warm place tomorrow :)
*/
#ifndef __T2TBATCHLOADER_H__
#define __T2TBATCHLOADER_H__
#include "../../network/XNet.h"
using namespace nts;
namespace transformer
{
#define MAX_SEQUENCE_LENGTH 1024 * 4
/* node to keep batch information */
struct BatchNode
{
/* begining position */
int beg;
/* end position */
int end;
/* maximum word number on the encoder side */
int maxEnc;
/* maximum word number on the decoder side */
int maxDec;
/* a key for sorting */
int key;
};
class T2TBatchLoader
{
public:
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* batch buf */
BatchNode * bufBatch;
/* buffer size */
int bufSize;
/* size of batch buffer */
int bufBatchSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* offset for next batch */
int nextBatch;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* bucket size */
int bucketSize;
public:
/* constructor */
T2TBatchLoader();
/* de-constructor */
~T2TBatchLoader();
/* initialization */
void Init(int argc, char ** argv);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* set the random batch flag */
void SetRandomBatch(bool flag = true);
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
};
}
#endif
\ No newline at end of file
...@@ -31,6 +31,10 @@ namespace transformer ...@@ -31,6 +31,10 @@ namespace transformer
/* constructor */ /* constructor */
AttDecoder::AttDecoder() AttDecoder::AttDecoder()
{ {
attentions = NULL;
fnns = NULL;
attLayerNorms = NULL;
fnnLayerNorms = NULL;
attentionsEnde = NULL; attentionsEnde = NULL;
attEndeLayerNorms = NULL; attEndeLayerNorms = NULL;
} }
...@@ -38,6 +42,10 @@ AttDecoder::AttDecoder() ...@@ -38,6 +42,10 @@ AttDecoder::AttDecoder()
/* de-constructor */ /* de-constructor */
AttDecoder::~AttDecoder() AttDecoder::~AttDecoder()
{ {
delete[] attentions;
delete[] fnns;
delete[] attLayerNorms;
delete[] fnnLayerNorms;
delete[] attentionsEnde; delete[] attentionsEnde;
delete[] attEndeLayerNorms; delete[] attEndeLayerNorms;
} }
...@@ -49,16 +57,14 @@ initialize the model ...@@ -49,16 +57,14 @@ initialize the model
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void AttDecoder::InitModel(int argc, char ** argv, void AttDecoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
//AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); //AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -68,10 +74,10 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -68,10 +74,10 @@ void AttDecoder::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "dropout", &dropoutP, 0); LoadParamFloat(argc, argv, "dropout", &dropoutP, 0);
CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!"); CheckNTErrors(nlayer >= 1, "We have one encoding layer at least!");
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsizetgt\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem, false); embedder.InitModel(argc, argv, devID, false);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv, ...@@ -82,12 +88,12 @@ void AttDecoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for (int i = 0; i < nlayer; i++) { for (int i = 0; i < nlayer; i++) {
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID, myMem); attentionsEnde[i].InitModel(argc, argv, true, myIgnored, myDevID);
attEndeLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attEndeLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
...@@ -159,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X ...@@ -159,6 +165,8 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
} }
x.SetName(DECODING_NAME);
return x; return x;
} }
......
...@@ -26,6 +26,9 @@ ...@@ -26,6 +26,9 @@
namespace transformer namespace transformer
{ {
#define DECODING_NAME "decoding"
#define DECODING_INPUT_NAME "decoding_input"
class AttDecoder class AttDecoder
{ {
...@@ -34,9 +37,6 @@ public: ...@@ -34,9 +37,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -92,7 +92,7 @@ public: ...@@ -92,7 +92,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the decoding network */ /* make the decoding network */
XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TEmbedder::T2TEmbedder() T2TEmbedder::T2TEmbedder()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
maxLength = -1; maxLength = -1;
} }
...@@ -46,12 +45,10 @@ initialize the model ...@@ -46,12 +45,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, bool isEnc) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
if(isEnc){ if(isEnc){
LoadParamInt(argc, argv, "vsize", &vSize, -1); LoadParamInt(argc, argv, "vsize", &vSize, -1);
...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b ...@@ -64,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length) ...@@ -81,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -145,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem); XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -41,9 +41,6 @@ public: ...@@ -41,9 +41,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -71,7 +68,7 @@ public: ...@@ -71,7 +68,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL, bool isEnc = true); void InitModel(int argc, char ** argv, int myDevID = -1, bool isEnc = true);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int eSize, int d, int length);
......
...@@ -52,15 +52,12 @@ initialize the model ...@@ -52,15 +52,12 @@ initialize the model
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed >> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start) >> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id >> myDevID - device id*/
>> myMem - the memory pool
*/
void AttEncoder::InitModel(int argc, char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
ignored = myIgnored; ignored = myIgnored;
LoadParamInt(argc, argv, "nlayer", &nlayer, 6); LoadParamInt(argc, argv, "nlayer", &nlayer, 6);
...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -73,7 +70,7 @@ void AttEncoder::InitModel(int argc, char ** argv,
CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\""); CheckNTErrors(vSize > 1, "set vocabulary size by \"-vsize\"");
/* embedding model */ /* embedding model */
embedder.InitModel(argc, argv, devID, mem); embedder.InitModel(argc, argv, devID);
attentions = new T2TAttention[nlayer]; attentions = new T2TAttention[nlayer];
fnns = new T2TFNN[nlayer]; fnns = new T2TFNN[nlayer];
...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv, ...@@ -82,10 +79,10 @@ void AttEncoder::InitModel(int argc, char ** argv,
/* initialize the stacked layers */ /* initialize the stacked layers */
for(int i = 0; i < nlayer; i++){ for(int i = 0; i < nlayer; i++){
attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem); attentions[i].InitModel(argc, argv, myIsMasked, myIgnored, myDevID);
fnns[i].InitModel(argc, argv, myDevID, myMem); fnns[i].InitModel(argc, argv, myDevID);
attLayerNorms[i].InitModel(argc, argv, myDevID, myMem); attLayerNorms[i].InitModel(argc, argv, myDevID);
fnnLayerNorms[i].InitModel(argc, argv, myDevID, myMem); fnnLayerNorms[i].InitModel(argc, argv, myDevID);
} }
} }
...@@ -139,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo ...@@ -139,6 +136,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
} }
x.SetName(ENCODING_NAME);
input.SetName(ENCODING_INPUT_NAME);
return x; return x;
} }
......
...@@ -32,6 +32,9 @@ using namespace nts; ...@@ -32,6 +32,9 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define ENCODING_NAME "encoding"
#define ENCODING_INPUT_NAME "encoding_input"
/* /*
base class of the encoder base class of the encoder
...@@ -62,9 +65,6 @@ public: ...@@ -62,9 +65,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* layer number */ /* layer number */
int nlayer; int nlayer;
...@@ -115,7 +115,7 @@ public: ...@@ -115,7 +115,7 @@ public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, bool isTraining);
......
...@@ -47,12 +47,10 @@ initialize the model ...@@ -47,12 +47,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -62,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize)); float finfout1 = (float)sqrt(6.0F * scale/(inSize + hSize));
......
...@@ -36,9 +36,6 @@ public: ...@@ -36,9 +36,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* size of input vector */ /* size of input vector */
int inSize; int inSize;
...@@ -72,7 +69,7 @@ public: ...@@ -72,7 +69,7 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input, bool isTraining); XTensor Make(XTensor &input, bool isTraining);
......
...@@ -32,7 +32,6 @@ namespace transformer ...@@ -32,7 +32,6 @@ namespace transformer
T2TLN::T2TLN() T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL;
d = 0; d = 0;
} }
...@@ -46,18 +45,16 @@ initialize the model ...@@ -46,18 +45,16 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -36,9 +36,6 @@ class T2TLN ...@@ -36,9 +36,6 @@ class T2TLN
public: public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the transformation matrix w */ /* the transformation matrix w */
XTensor w; XTensor w;
...@@ -57,7 +54,7 @@ public: ...@@ -57,7 +54,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "../../tensor/core/CHeader.h"
#include "T2TLengthPenalty.h"
using namespace nts;
namespace transformer
{
/*
GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence
>> length - length of the sequence (for each entry)
>> alpha - the parameter controls the length preference
<< return - length penaltyof the sequence (for each entry)
*/
XTensor T2TLengthPenalizer::GNMT(const XTensor & length, float alpha)
{
XTensor base;
XTensor lp;
//base = ScaleAndShift(ScaleAndShift(length, 0, 5.0F), 1.0F/(5 + 1));
base = (length + 5)/(1 + 5);
lp = Power(base, alpha);
return lp;
}
}
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-08
*/ * Start of a new week - I just finished several documents.
* Writing document is harder than writing code :)
*/
#ifndef __SUMBYCOLUMNTV_H__ #ifndef __T2TLENGTHPENALTY_H__
#define __SUMBYCOLUMNTV_H__ #define __T2TLENGTHPENALTY_H__
#include "../../XTensor.h" #include "../../tensor/XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) using namespace nts;
/* sum of a tensor and a (column) vector */ namespace transformer
void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); {
} // namespace nts(NiuTrans.Tensor) /* We intend to penalize short sequences because they have higher score
in product of a sequence of probability-like terms and have more chances
to beat others in search. */
class T2TLengthPenalizer
{
public:
/* GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence */
static
XTensor GNMT(const XTensor & length, float alpha);
};
#endif // __SUMBYCOLUMNTV_H__ }
#endif
...@@ -40,9 +40,6 @@ public: ...@@ -40,9 +40,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* the encoder */ /* the encoder */
AttEncoder * encoder; AttEncoder * encoder;
...@@ -98,7 +95,7 @@ public: ...@@ -98,7 +95,7 @@ public:
XTensor &maskDec, XTensor &maskEncDec); XTensor &maskDec, XTensor &maskEncDec);
/* get parameter matrics */ /* get parameter matrics */
void GetParams(XList &list); void GetParams(TensorList &list);
/* dump the parameters */ /* dump the parameters */
void Dump(const char * fn); void Dump(const char * fn);
......
...@@ -31,7 +31,6 @@ namespace transformer ...@@ -31,7 +31,6 @@ namespace transformer
T2TOutput::T2TOutput() T2TOutput::T2TOutput()
{ {
devID = -1; devID = -1;
mem = NULL;
vSize = -1; vSize = -1;
inSize = -1; inSize = -1;
hSize = -1; hSize = -1;
...@@ -47,12 +46,10 @@ initialize the model ...@@ -47,12 +46,10 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
{ {
devID = myDevID; devID = myDevID;
mem = myMem;
float minmax = 0; float minmax = 0;
...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -61,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
...@@ -95,6 +92,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output) ...@@ -95,6 +92,7 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
//output = LogSoftmax(MMul(x, w), -1); //output = LogSoftmax(MMul(x, w), -1);
output = Softmax(MMul(x, w), -1); output = Softmax(MMul(x, w), -1);
output.SetName(OUTPUT_NAME);
} }
} }
...@@ -28,6 +28,8 @@ using namespace nts; ...@@ -28,6 +28,8 @@ using namespace nts;
namespace transformer namespace transformer
{ {
#define OUTPUT_NAME "output"
/* output layer */ /* output layer */
class T2TOutput class T2TOutput
...@@ -36,9 +38,6 @@ public: ...@@ -36,9 +38,6 @@ public:
/* device id */ /* device id */
int devID; int devID;
/* memory pool */
XMem * mem;
/* vocabulary size */ /* vocabulary size */
int vSize; int vSize;
...@@ -59,7 +58,7 @@ public: ...@@ -59,7 +58,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
*/
#include "T2TPredictor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TStateBundle::T2TStateBundle()
{
states = NULL;
isStart = false;
}
/* de-constructor */
T2TStateBundle::~T2TStateBundle()
{
if(states != NULL)
delete[] states;
}
/*
create states
>> num - number of states
*/
void T2TStateBundle::MakeStates(int num)
{
CheckNTErrors(num > 0, "invalid number");
if(states != NULL)
delete[] states;
states = new T2TState[num];
for(int i = 0; i < num; i++){
states[i].prediction = -1;
states[i].pid = T2T_PID_EMPTY;
states[i].isEnd = false;
states[i].isStart = false;
states[i].isCompleted = false;
states[i].prob = 0;
states[i].probPath = 0;
states[i].modelScore = 0;
states[i].nstep = 0;
states[i].last = NULL;
}
stateNum = num;
}
/* constructor */
T2TPredictor::T2TPredictor()
{
startSymbol = -1;
}
/* de-constructor */
T2TPredictor::~T2TPredictor()
{
}
/*
create an initial state
>> model - the t2t model
>> top - the top-most layer of the network
>> input - input of the network
>> beamSize - beam size
>> state - the state to be initialized
*/
void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state)
{
state->layersEnc.Clear();
state->layersDec.Clear();
XTensor * encoding = XLink::SearchNode(top, ENCODING_NAME);
CheckNTErrors(encoding != NULL, "No encoding layers found!");
state->layersEnc.Add(encoding);
state->layersDec.Add(NULL);
int dims[MAX_TENSOR_DIM_NUM];
for (int i = 0; i < input->order - 1; i++)
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll();
state->nstep.SetZeroAll();
state->endMark.SetZeroAll();
state->stateNum = 0;
}
/*
set start symbol
>> symbol - the symbol (in integer)
*/
void T2TPredictor::SetStartSymbol(int symbol)
{
startSymbol = symbol;
}
/*
read a state
>> model - the t2t model that keeps the network created so far
>> state - a set of states. It keeps
1) hypotheses (states)
2) probablities of hypotheses
3) parts of the network for expanding toward the next state
*/
void T2TPredictor::Read(T2TModel * model, T2TStateBundle * state)
{
m = model;
s = state;
}
/*
predict the next state
>> next - next states (assuming that the current state has been read)
>> encoding - encoder output
>> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder
*/
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor * inputEnc, XTensor * paddingEnc)
{
int dims[MAX_TENSOR_DIM_NUM];
next->layersEnc.Clear();
next->layersDec.Clear();
AttDecoder &decoder = *m->decoder;
/* word indices of previous positions */
XTensor * inputLast = (XTensor*)s->layersDec.GetItem(0);
/* word indices of positions up to next state */
XTensor inputDec;
/* the first token */
XTensor first;
CheckNTErrors(inputEnc->order >= 2, "Wrong order of the tensor!");
for(int i = 0; i < inputEnc->order - 1; i++)
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixed(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) {
inputDec = Identity(first);
}
else{
inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID);
inputDec = Concatenate(first, inputDec, inputDec.order - 1);
}
/* prediction probabilities */
XTensor &output = next->prob;
XTensor decoding;
XTensor decodingStep;
for(int i = 0; i < inputDec.order - 1; i++)
dims[i] = inputDec.GetDim(i);
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixed(paddingDec, 1);
XTensor maskDec;
XTensor maskEncDec;
/* decoder mask */
m->MakeMTMaskDec(*inputEnc, inputDec, *paddingEnc, paddingDec, maskDec, maskEncDec);
/* make the decoding network */
decoding = decoder.Make(inputDec, *encoding, maskDec, maskEncDec, false);
XTensor selectSrc;
XTensor selectTgt;
CheckNTErrors(decoding.order >= 2, "The tensor must be of order 2 or larger!");
int stride = decoding.GetDim(decoding.order - 2);
InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0);
selectSrc.SetDevice(decoding.devID);
selectTgt.SetDevice(decoding.devID);
/* the decoder output of the last position */
decodingStep = CopyIndexed(decoding, decoding.order - 2, selectSrc, selectTgt);
/* generate the output probabilities */
m->outputLayer->Make(decodingStep, output);
next->layersEnc.AddList(&s->layersEnc);
next->layersDec.Add(&inputDec);
next->layersDec.Add(&output);
}
/*
generate paths up to the states of the current step
>> state - state bundle of the current step
*/
XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
{
CheckNTErrors(state->stateNum >= 0, "Illegal state!");
int distance = -1;
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
cur = cur->last;
}
if(nsteps > distance)
distance = nsteps;
}
XTensor path;
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
T2TState * cur = state->states + i;
int nsteps = 0;
while(cur != NULL){
nsteps++;
path.Set2DInt(cur->prediction, i, distance - nsteps);
cur = cur->last;
}
}
return path;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-13
* This is the first source file I create in 2019 - new start!
*/
#ifndef __T2TPREDICTOR_H__
#define __T2TPREDICTOR_H__
#include "T2TModel.h"
#include "T2TLengthPenalty.h"
namespace transformer
{
#define T2T_PID_EMPTY -1
/* state for search. It keeps the path (back-pointer), prediction distribution,
and etc. It can be regarded as a hypothsis in translation. */
class T2TState
{
public:
/* we assume that the prediction is an integer */
int prediction;
/* id of the problem. One can regard it as the sentence id when we
translate a number of sentences in the batched manner. The hypothesis
is empty if id = -1 */
int pid;
/* indicates whether the state is an end */
bool isEnd;
/* indicates whether the state is the start */
bool isStart;
/* indicates whether the state is completed */
bool isCompleted;
/* probability of every prediction (last state of the path) */
float prob;
/* probability of every path */
float probPath;
/* model score of every path. A model score = path probability + some other stuff */
float modelScore;
/* nubmer of steps we go over so far */
int nstep;
/* pointer to the previous state */
T2TState * last;
};
/* a bundle of states */
class T2TStateBundle
{
public:
/* predictions */
XTensor prediction;
/* id of the previous state that generates the current one */
XTensor preID;
/* mark that indicates whether each hypothesis is completed */
XTensor endMark;
/* probability of every prediction (last state of the path) */
XTensor prob;
/* probability of every path */
XTensor probPath;
/* model score of every path */
XTensor modelScore;
/* step number of each hypothesis */
XTensor nstep;
/* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */
TensorList layersEnc;
/* layers on the decoder side */
TensorList layersDec;
/* list of states */
T2TState * states;
/* number of states */
int stateNum;
/* indicates whether it is the first state */
bool isStart;
public:
/* constructor */
T2TStateBundle();
/* de-constructor */
~T2TStateBundle();
/* create states */
void MakeStates(int num);
};
/* The predictor reads the current state and then predicts the next.
It is exactly the same procedure of MT inference -
we get the state of previous words and then generate the next word.
Here, a state can be regared as the representation of words (word
indices, hidden states, embeddings and etc.). */
class T2TPredictor
{
private:
/* pointer to the transformer model */
T2TModel * m;
/* current state */
T2TStateBundle * s;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TPredictor();
/* de-constructor */
~T2TPredictor();
/* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */
void SetStartSymbol(int symbol);
/* read a state */
void Read(T2TModel * model, T2TStateBundle * state);
/* predict the next state */
void Predict(T2TStateBundle * next, XTensor * encoding, XTensor * inputEnc, XTensor * paddingEnc);
/* generate paths up to the states of the current step */
XTensor GeneratePaths(T2TStateBundle * state);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#ifndef __T2TSEARCH_H__
#define __T2TSEARCH_H__
#include "T2TModel.h"
#include "T2TPredictor.h"
namespace transformer
{
/* The class orgnizes the search process. It calls "predictors" to generate
distributions of the predictions and prunes the search space by beam pruning.
This makes a graph where each path respresents a translation hypothsis.
The output can be the path with the highest model score. */
class T2TSearch
{
private:
/* the alpha parameter controls the length preference */
float alpha;
/* predictor */
T2TPredictor predictor;
/* max length of the generated sequence */
int maxLength;
/* beam size */
int beamSize;
/* batch size */
int batchSize;
/* we keep the final hypotheses in a heap for each sentence in the batch. */
XHeap<MIN_HEAP, float> * fullHypos;
/* array of the end symbols */
int * endSymbols;
/* number of the end symbols */
int endSymbolNum;
/* start symbol */
int startSymbol;
public:
/* constructor */
T2TSearch();
/* de-constructor */
~T2TSearch();
/* initialize the model */
void Init(int argc, char ** argv);
/* search for the most promising states */
void Search(T2TModel * model, XTensor * input, XTensor * padding, XTensor * output);
/* preparation */
void Prepare(int myBatchSize,int myBeamSize);
/* compute the model score for each hypothesis */
void Score(T2TStateBundle * prev, T2TStateBundle * beam);
/* generate token indices via beam pruning */
void Generate(T2TStateBundle * beam);
/* expand the search graph */
void Expand(T2TStateBundle * prev, T2TStateBundle * beam);
/* collect hypotheses with ending symbol */
void Collect(T2TStateBundle * beam);
/* fill the hypotheis heap with incomplete hypothses */
void FillHeap(T2TStateBundle * beam);
/* save the output sequences in a tensor */
void Dump(XTensor * output);
/* check if the token is an end symbol */
bool IsEnd(int token);
/* set end symbols for search */
void SetEnd(const int * tokens, const int tokenNum);
/* make a mask to prevent duplicated entries in beam expansion for the first position */
XTensor MakeFirstMask(T2TStateBundle * beam);
};
}
#endif
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
*/
#include <math.h>
#include "T2TUtility.h"
#include "T2TTester.h"
#include "T2TSearch.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h"
using namespace nts;
namespace transformer
{
/* constructor */
T2TTester::T2TTester()
{
}
/* de-constructor */
T2TTester::~T2TTester()
{
}
/* initialize the model */
void T2TTester::Init(int argc, char ** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
batchLoader.Init(argc, argv);
seacher.Init(argc, argv);
}
/*
test the model
>> fn - test data file
>> ofn - output data file
>> model - model that is trained
*/
void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
{
int wc = 0;
int ws = 0;
int wordCount = 0;
int wordCountTotal = 0;
int sentCount = 0;
int batchCount = 0;
float loss = 0;
/* data files */
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot read the test file");
FILE * ofile = fopen(ofn, "wb");
CheckNTErrors(ofile, "Cannot open the output file");
int devID = model->devID;
XNet net;
double startT = GetClockSec();
wordCount = 0;
/* batch of input sequences */
XTensor batchEnc;
XTensor batchDec;
/* label */
XTensor label;
/* padding */
XTensor paddingEnc;
XTensor paddingDec;
/* gold standard */
XTensor gold;
/* an array that keeps the sequences */
int * seqs = new int[MILLION];
batchLoader.SetRandomBatch(false);
batchLoader.ClearBuf();
while(batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &paddingDec, &paddingDec, &gold, &label,
seqs, vSize, vSizeTgt,
1, 1, false, ws, wc, devID, false))
{
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch!");
CheckNTErrors(!model->isLM, "Only MT model is supported!");
XTensor output;
seacher.Search(model, &batchEnc, &paddingEnc, &output);
Dump(ofile, &output);
float prob = 0;
loss += -prob;
wc = batchEnc.GetDim(-1);
wordCount += wc;
wordCountTotal += wc;
sentCount += batchEnc.GetDim(-2);
batchCount += 1;
if (batchCount % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr,
"[INFO] elapsed=%.1fs, sentence=%d, sword=%d\n",
elapsed, sentCount, wordCount);
}
}
fclose(file);
fclose(ofile);
delete[] seqs;
double elapsed = GetClockSec() - startT;
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, exp(loss/wordCount));
}
/*
dump the result into the file
>> file - data file
>> output - output tensor
*/
void T2TTester::Dump(FILE * file, XTensor * output)
{
int seqLength = output->GetDim(-1);
for (int i = 0; i < output->unitNum; i += seqLength) {
for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j);
fprintf(file, "%d ", w);
if (w < 0)
break;
}
fprintf(file, "\n");
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-03-27
* A week with no trips :)
*/
#ifndef __T2TTESTER_H__
#define __T2TTESTER_H__
#include "T2TSearch.h"
#include "T2TBatchLoader.h"
namespace transformer
{
/* This class translates test sentences with a trained model. */
class T2TTester
{
public:
/* vocabulary size of the source side */
int vSize;
/* vocabulary size of the target side */
int vSizeTgt;
/* for batching */
T2TBatchLoader batchLoader;
/* decoder for inference */
T2TSearch seacher;
public:
/* constructor */
T2TTester();
/* de-constructor */
~T2TTester();
/* initialize the model */
void Init(int argc, char ** argv);
/* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */
void Dump(FILE * file, XTensor * output);
};
}
#endif
\ No newline at end of file
...@@ -23,35 +23,14 @@ ...@@ -23,35 +23,14 @@
#define __T2TTRAINER_H__ #define __T2TTRAINER_H__
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TBatchLoader.h"
#include "../../tensor/function/FHeader.h" #include "../../tensor/function/FHeader.h"
#define MAX_SEQUENCE_LENGTH 1024 * 4
using namespace nts; using namespace nts;
namespace transformer namespace transformer
{ {
/* node to keep batch information */
struct BatchNode
{
/* begining position */
int beg;
/* end position */
int end;
/* maximum word number on the encoder side */
int maxEnc;
/* maximum word number on the decoder side */
int maxDec;
/* a key for sorting */
int key;
};
/* trainer of the T2T model */ /* trainer of the T2T model */
class T2TTrainer class T2TTrainer
{ {
...@@ -61,42 +40,6 @@ public: ...@@ -61,42 +40,6 @@ public:
/* parameter array */ /* parameter array */
char ** argArray; char ** argArray;
/* buffer for loading words */
int * buf;
/* another buffer */
int * buf2;
/* batch buf */
BatchNode * bufBatch;
/* buffer size */
int bufSize;
/* size of batch buffer */
int bufBatchSize;
/* length of each sequence */
int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */
int * seqOffset;
/* number of sequences in the buffer */
int nseqBuf;
/* offset for next sequence in the buffer */
int nextSeq;
/* offset for next batch */
int nextBatch;
/* indicates whether the sequence is sorted by length */
bool isLenSorted;
/* dimension size of each inner layer */ /* dimension size of each inner layer */
int d; int d;
...@@ -139,10 +82,10 @@ public: ...@@ -139,10 +82,10 @@ public:
float adamBeta2T; float adamBeta2T;
/* list of the moment of the parameter matrics */ /* list of the moment of the parameter matrics */
XList moments; TensorList moments;
/* list of the 2nd order moment of the parameter matrics */ /* list of the 2nd order moment of the parameter matrics */
XList moments2nd; TensorList moments2nd;
/* indicates whether the data file is shuffled for training */ /* indicates whether the data file is shuffled for training */
bool isShuffled; bool isShuffled;
...@@ -158,26 +101,15 @@ public: ...@@ -158,26 +101,15 @@ public:
/* number of batches on which we do model update */ /* number of batches on which we do model update */
int updateStep; int updateStep;
/* indicates whether we double the </s> symbol for the output of lms */
bool isDoubledEnd;
/* indicates whether we use batchsize = max * sc
rather rather than batchsize = word-number, where max is the maximum
length and sc is the sentence number */
bool isSmallBatch;
/* counterpart of "isSmallBatch" */
bool isBigBatch;
/* randomize batches */
bool isRandomBatch;
/* indicates whether we intend to debug the net */ /* indicates whether we intend to debug the net */
bool isDebugged; bool isDebugged;
/* bucket size */ /* indicates whether the sequence is sorted by length */
int bucketSize; bool isLenSorted;
/* for batching */
T2TBatchLoader batchLoader;
public: public:
/* constructor */ /* constructor */
...@@ -197,46 +129,6 @@ public: ...@@ -197,46 +129,6 @@ public:
/* make a checkpoint */ /* make a checkpoint */
void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id); void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id);
/* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step);
/* clear data buffer */
void ClearBuf();
/* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
XTensor * gold, XTensor * label,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
/* get word probabilities for a batch of sequences */ /* get word probabilities for a batch of sequences */
float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs); float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs);
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TUtility.h" #include "T2TUtility.h"
#include "T2TTrainer.h" #include "T2TTrainer.h"
#include "T2TPredictor.h"
#include "T2TTester.h"
#include "../../tensor/XDevice.h" #include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h" #include "../../tensor/XGlobal.h"
...@@ -47,6 +49,7 @@ int TransformerMain(int argc, const char ** argv) ...@@ -47,6 +49,7 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, args); ShowParams(argc, args);
bool isBeamSearch = false;
char * trainFN = new char[MAX_LINE_LENGTH]; char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH]; char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH]; char * testFN = new char[MAX_LINE_LENGTH];
...@@ -56,6 +59,7 @@ int TransformerMain(int argc, const char ** argv) ...@@ -56,6 +59,7 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString(argc, args, "model", modelFN, ""); LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "test", testFN, ""); LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, args, "output", outputFN, ""); LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
...@@ -64,28 +68,35 @@ int TransformerMain(int argc, const char ** argv) ...@@ -64,28 +68,35 @@ int TransformerMain(int argc, const char ** argv)
T2TModel model; T2TModel model;
model.InitModel(argc, args); model.InitModel(argc, args);
//if(strcmp(modelFN, ""))
// model.Read(modelFN);
/* learn model parameters */ /* learn model parameters */
if(strcmp(trainFN, "")) if(strcmp(trainFN, ""))
trainer.Train(trainFN, testFN, strcmp(modelFN, "") ? modelFN : "checkpoint.model", &model); trainer.Train(trainFN, testFN, strcmp(modelFN, "") ? modelFN : "checkpoint.model", &model);
/* save the final model */ /* save the final model */
//if(strcmp(modelFN, "") && strcmp(trainFN, "")) if(strcmp(modelFN, "") && strcmp(trainFN, ""))
//model.Dump(modelFN); model.Dump(modelFN);
/* load the model if neccessary */ /* load the model if neccessary */
//if(strcmp(modelFN, "")) if(strcmp(modelFN, ""))
//model.Read(modelFN); model.Read(modelFN);
T2TTrainer tester;
tester.Init(argc, args);
/* test the model on the new data */ /* test the model on the new data */
if(strcmp(testFN, "") && strcmp(outputFN, "")) if(strcmp(testFN, "") && strcmp(outputFN, "")){
tester.Test(testFN, outputFN, &model); /* beam search */
if(isBeamSearch){
T2TTester searcher;
searcher.Init(argc, args);
searcher.Test(testFN, outputFN, &model);
}
/* forced decoding */
else{
T2TTrainer tester;
tester.Init(argc, args);
tester.Test(testFN, outputFN, &model);
}
}
delete[] trainFN; delete[] trainFN;
delete[] modelFN; delete[] modelFN;
......
...@@ -274,7 +274,7 @@ void T2TTest2() ...@@ -274,7 +274,7 @@ void T2TTest2()
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1); //XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " "); //myRead(probs, "probs.txt", " ");
_SetDataFixedFloat(probs, 1.0F); _SetDataFixed(probs, 1.0F);
probs->Reshape(1, probs->unitNum); probs->Reshape(1, probs->unitNum);
......
...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName) ...@@ -60,7 +60,7 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName)
} }
} }
/**************************************************** /*
Below is for calling CPU BLAS for fast matrix operations Below is for calling CPU BLAS for fast matrix operations
I'm not sure how fast it is. But it seems that other I'm not sure how fast it is. But it seems that other
guys are crazy about this. So I decided to have a try. guys are crazy about this. So I decided to have a try.
...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h) ...@@ -81,35 +81,4 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
return f; return f;
} }
/*
data type conversion
>> devID - device id
>> s - source data array
>> typeS - source data type
>> t - target data array
>> typeT - target data type
>> size - number of the items in s (and t)
*/
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{
CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT)
return;
if(typeS == X_FLOAT && typeT == X_FLOAT16){
for(int i = 0; i < size; i++){
((unsigned short*)t)[i] = FloatToFloat16(((float*)s)[i]);
}
}
else if(typeS == X_FLOAT16 && typeT == X_FLOAT){
for(int i = 0; i < size; i++){
((float*)t)[i] = Float16ToFloat(((unsigned short*)s)[i]);
}
}
else{
ShowNTErrors("Unsupported data types for conversion!");
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName); ...@@ -49,15 +49,6 @@ extern TENSOR_DATA_TYPE GetDataType(const char * typeName);
/* data conversion (for lower precision computation) */ /* data conversion (for lower precision computation) */
unsigned short FloatToFloat16(float f); unsigned short FloatToFloat16(float f);
float Float16ToFloat(unsigned short h); float Float16ToFloat(unsigned short h);
void ConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#ifdef USE_CUDA
void CudaConvertDataType(int devID,
void * s, TENSOR_DATA_TYPE typeS,
void * t, TENSOR_DATA_TYPE typeT, int size);
#endif
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID) ...@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
cudaError_t error = cudaSetDevice(devID); cudaError_t error = cudaSetDevice(devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
#else #else
...@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID) ...@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
SetFastFlags(); SetFastFlags();
} }
/* switch to a get current dev */ /* get the id of the current GPU device */
int XDevice::GetGPUDevice() int XDevice::GetGPUDevice()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice() ...@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice()
cudaError_t error = cudaGetDevice(&devID); cudaError_t error = cudaGetDevice(&devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error)); fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
...@@ -248,7 +250,7 @@ void XDevice::SetFastFlags() ...@@ -248,7 +250,7 @@ void XDevice::SetFastFlags()
#endif #endif
} }
/* reset cuda flag for more efficient cuda execution (all devices) */ /* reset the cuda flag for more efficient cuda execution (all devices) */
void XDevice::SetFastFlagsAllDevices() void XDevice::SetFastFlagsAllDevices()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -266,10 +268,6 @@ XDevManager::XDevManager() ...@@ -266,10 +268,6 @@ XDevManager::XDevManager()
{ {
Clear(); Clear();
Init(); Init();
#ifndef USE_CPP11
fprintf(stderr, "Warning!!! c++ 11 is RECOMMENDED for compilation.\n");
#endif
} }
/* de-constructor */ /* de-constructor */
...@@ -278,7 +276,7 @@ XDevManager::~XDevManager() ...@@ -278,7 +276,7 @@ XDevManager::~XDevManager()
} }
/* initialize it and get the CPU and GPU information */ /* initialization */
void XDevManager::Init() void XDevManager::Init()
{ {
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
...@@ -322,7 +320,7 @@ void XDevManager::Clear() ...@@ -322,7 +320,7 @@ void XDevManager::Clear()
#ifdef USE_CUDA #ifdef USE_CUDA
/* get the handle of GPU */ /* get the handle of a given GPU */
cublasHandle_t * XDevManager::GetCudaHandle(const int devID) cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -330,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID) ...@@ -330,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
return GPUs[devID].GetCublasHandle(); return GPUs[devID].GetCublasHandle();
} }
/* get the stream of cuda */ /* get the stream of a given GPU */
cudaStream_t * XDevManager::GetCudaStream(const int devID) cudaStream_t * XDevManager::GetCudaStream(const int devID)
{ {
CheckNTErrors(devID < nGPU, "index of GPU is out of range."); CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
...@@ -478,7 +476,7 @@ split a string ...@@ -478,7 +476,7 @@ split a string
>> items - splitting result >> items - splitting result
<< return - how many items are there << return - how many items are there
*/ */
int SplitALine(char * inputString, const char * seperator, XList * items) int SplitALine(char * inputString, const char * seperator, StrList* items)
{ {
items->Clear(); items->Clear();
...@@ -527,12 +525,12 @@ get device ids for the given device information ...@@ -527,12 +525,12 @@ get device ids for the given device information
devInfo = "0:CPU-1 1:GPU-0 2:CPU-1" devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
means that the first device is CPU, the second device means that the first device is CPU, the second device
is GPU-0, the third device is CPU. is GPU-0, the third device is CPU.
>> devIDs - device sequence specified by devInfo >> devIDs - device IDs specified by devInfo
<< return - number of devices << return - number of devices
*/ */
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{ {
XList * terms = new XList(1); StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms); SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){ for(int i = 0; i < terms->count; i++){
...@@ -569,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) ...@@ -569,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
return devCount; return devCount;
} }
/* show id sequence */ /* show device IDs */
void XDevManager::ShowDeviceIDs(char * devInfo, char * msg) void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
{ {
msg[0] = 0; msg[0] = 0;
......
...@@ -236,6 +236,18 @@ extern XDevManager GDevs; ...@@ -236,6 +236,18 @@ extern XDevManager GDevs;
cudaSetDevice(devIDBackup); \ cudaSetDevice(devIDBackup); \
} \ } \
#define CheckDev(a, b) \
{ \
if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
else if (a >= 0 && b >= 0 && a != b) { \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
...@@ -51,7 +51,13 @@ bool CONST_TRUE = true; ...@@ -51,7 +51,13 @@ bool CONST_TRUE = true;
int verboseLevel = 0; int verboseLevel = 0;
bool useBLAS = false; bool useBLAS = false;
bool useCUDA = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL; FILE * tmpLog = NULL;
double myTime = 0; double myTime = 0;
......
...@@ -45,10 +45,6 @@ typedef int8_t __int8; ...@@ -45,10 +45,6 @@ typedef int8_t __int8;
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
#if (__cplusplus >= 201103L || _MSC_VER >= 1700)
#define USE_CPP11
#endif
#define _XINLINE_ #define _XINLINE_
//#define DOUBELPRICSION //#define DOUBELPRICSION
...@@ -159,7 +155,9 @@ extern bool useCUDA; ...@@ -159,7 +155,9 @@ extern bool useCUDA;
#define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}} #define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}}
#define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}} #define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}}
#define B2I(V) V==0?false:true #define B2I(V) V == 0 ? false : true
#define MODX(a, b) int(b == 0 ? a : a - floor(double(a)/b) * b)
/* BLAS interfaces */ /* BLAS interfaces */
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
......
...@@ -31,15 +31,15 @@ namespace nts{ ...@@ -31,15 +31,15 @@ namespace nts{
/* constructor */ /* constructor */
template<HeapType hType, typename T> template<HeapType hType, typename T>
XHeap<hType, T>::XHeap()
{
}
/* constructor */
template<HeapType hType, typename T>
XHeap<hType, T>::XHeap(int mySize, XMem * myMem) XHeap<hType, T>::XHeap(int mySize, XMem * myMem)
{ {
mem = myMem; Init(mySize, myMem);
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
} }
/* deconstructor */ /* deconstructor */
...@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap() ...@@ -50,6 +50,19 @@ XHeap<hType, T>::~XHeap()
} }
template<HeapType hType, typename T> template<HeapType hType, typename T>
void XHeap<hType, T>::Init(int mySize, XMem * myMem)
{
mem = myMem;
size = mySize;
count = 0;
if (mem == NULL)
items = new HeapNode<T>[mySize];
else
mem->Alloc(mem->devID, mySize * sizeof(T));
}
template<HeapType hType, typename T>
void XHeap<hType, T>::Clear(T initValue) void XHeap<hType, T>::Clear(T initValue)
{ {
count = 0; count = 0;
...@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End() ...@@ -89,10 +102,24 @@ _XINLINE_ HeapNode<T> XHeap<hType, T>::End()
template<HeapType hType, typename T> template<HeapType hType, typename T>
_XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node) _XINLINE_ void XHeap<hType, T>::Push(HeapNode<T> node)
{ {
//CheckNTErrors((count < size), "Heap is full!"); if (count < size) {
items[count] = node; items[count] = node;
Up(count); Up(count);
count++; count++;
}
else if(count == size){
HeapNode<T> & item0 = items[0];
if (hType == MIN_HEAP && item0.value >= node.value)
return;
else if (hType == MAX_HEAP && item0.value <= node.value)
return;
items[0] = node;
Down(0);
}
else {
ShowNTErrors("Overflow of the heap!");
}
} }
/* replace the top-most item and update the heap */ /* replace the top-most item and update the heap */
...@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node) ...@@ -107,7 +134,7 @@ _XINLINE_ void XHeap<hType, T>::ReplaceTop(HeapNode<T> node)
template<HeapType hType, typename T> template<HeapType hType, typename T>
_XINLINE_ HeapNode<T> XHeap<hType, T>::Pop() _XINLINE_ HeapNode<T> XHeap<hType, T>::Pop()
{ {
//CheckNTErrors((size > 0), "Empty heap!"); CheckNTErrors(count > 0, "Empty heap!");
HeapNode<T> node = items[0]; HeapNode<T> node = items[0];
items[0] = items[count - 1]; items[0] = items[count - 1];
count--; count--;
......
...@@ -39,7 +39,7 @@ template <typename T> ...@@ -39,7 +39,7 @@ template <typename T>
struct HeapNode struct HeapNode
{ {
/* node index */ /* node index */
int index; long long index;
/* value of the node */ /* value of the node */
T value; T value;
...@@ -52,9 +52,16 @@ struct HeapNode ...@@ -52,9 +52,16 @@ struct HeapNode
HeapNode(int i, T v) HeapNode(int i, T v)
{ {
index = i; index = (long long)i;
value = v; value = v;
}; };
HeapNode(void * i, T v)
{
index = (long long)i;
value = v;
}
}; };
/* a heap that keeps a data array of T */ /* a heap that keeps a data array of T */
...@@ -76,11 +83,17 @@ public: ...@@ -76,11 +83,17 @@ public:
public: public:
/* constructor */ /* constructor */
XHeap();
/* constructor */
XHeap(int mySize, XMem * myMem = NULL); XHeap(int mySize, XMem * myMem = NULL);
/* deconstructor */ /* deconstructor */
~XHeap(); ~XHeap();
/* initialization */
void Init(int mySize, XMem * myMem = NULL);
/* clear the data */ /* clear the data */
void Clear(T initValue); void Clear(T initValue);
...@@ -107,6 +120,9 @@ public: ...@@ -107,6 +120,9 @@ public:
/* move item k up the tree */ /* move item k up the tree */
void Up(int k); void Up(int k);
/* how many items are kept in the heap */
inline int Count() { return count; };
}; };
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id ...@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL) if(h == NULL)
return; return;
XList list(2); TensorList list(2);
list.Add(t1); list.Add((XTensor*)t1);
list.Add(t2); list.Add((XTensor*)t2);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, ...@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
if (h == NULL) if (h == NULL)
return; return;
XList list(3); TensorList list(3);
list.Add(t1); list.Add((XTensor*)t1);
list.Add(t2); list.Add((XTensor*)t2);
list.Add(t3); list.Add((XTensor*)t3);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor ...@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor >> h - head tensor
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(const XList * list, XTensor * h, int id) void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
{ {
/* forward */ /* forward */
XLink &income = h->income; XLink &income = h->income;
...@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors ...@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors
>> list - a list of output tensors >> list - a list of output tensors
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(XTensor * t, XList * list, int id) void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{ {
/* forward */ /* forward */
for(int i = 0; i < list->count; i++){ for(int i = 0; i < list->count; i++){
...@@ -624,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target) ...@@ -624,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
ClearIncoming(target); ClearIncoming(target);
int tailNum = reference->income.tailNum; int tailNum = reference->income.tailNum;
XList tails(tailNum); TensorList tails(tailNum);
for(int i = 0; i < tailNum; i++){ for(int i = 0; i < tailNum; i++){
XTensor * tail = (XTensor*)reference->income.tails[i]; XTensor * tail = (XTensor*)reference->income.tails[i];
tails.Add(tail); tails.Add(tail);
...@@ -743,7 +743,7 @@ search for a node in a top-down manner by its name ...@@ -743,7 +743,7 @@ search for a node in a top-down manner by its name
>> top - the top most node >> top - the top most node
<< return - the node we found << return - the node we found
*/ */
/*XTensor * XLink::SearchNode(XTensor * top, const char * name) XTensor * XLink::SearchNode(XTensor * top, const char * name)
{ {
if(!strcmp(top->name, name)) if(!strcmp(top->name, name))
return top; return top;
...@@ -758,7 +758,7 @@ search for a node in a top-down manner by its name ...@@ -758,7 +758,7 @@ search for a node in a top-down manner by its name
} }
return NULL; return NULL;
}*/ }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -138,17 +138,17 @@ struct XLink ...@@ -138,17 +138,17 @@ struct XLink
static static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with two input tensors and a output tensor */ /* create a hyper edge with three input tensors and a output tensor */
static static
void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);
/* create a hyper edge with a list of input tensors and a output tensor */ /* create a hyper edge with a list of input tensors and a output tensor */
static static
void MakeLink(const XList * list, XTensor * h, int id); void MakeLink(const TensorList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */ /* create a hyper edge with a input tensors and a list of output tensors */
static static
void MakeLink(XTensor * h, XList * list, int id); void MakeLink(XTensor * h, TensorList * list, int id);
/* add a parameter */ /* add a parameter */
static static
...@@ -191,8 +191,8 @@ struct XLink ...@@ -191,8 +191,8 @@ struct XLink
void ShowNode(FILE * file, XTensor * node); void ShowNode(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */ /* search a node in a top-down manner by its name */
//static static
//XTensor * SearchNode(XTensor * top, const char * name); XTensor * SearchNode(XTensor * top, const char * name);
}; };
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,32 +15,31 @@ ...@@ -15,32 +15,31 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* *
* Implementation of list that keeps data items * Implementation of template list that keeps data items
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17 * $Created by: HU Chi (huchinlp@foxmail.com)
* The first coding job this year! *
* */
*/
#ifndef __XLIST_H__
#define __XLIST_H__
#include "XMem.h" #include "XMem.h"
#include "XGlobal.h" #include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ #ifndef __TensorList_H__
namespace nts{ #define __TensorList_H__
typedef int (* ListCompare)(const void * item1, const void * item2);
/* the XList class */ /* the nts (NiuTrans.Tensor) namespace */
class XList namespace nts {
{
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
public: public:
/* data items */ /* data items */
void ** items; T *items;
/* number of items */ /* number of items */
int count; int count;
...@@ -49,56 +48,88 @@ public: ...@@ -49,56 +48,88 @@ public:
int maxNum; int maxNum;
/* the memory pool for data array allocation */ /* the memory pool for data array allocation */
XMem * mem; XMem* mem;
/* indicates whether data items are integers */
bool isIntList;
public: public:
/* constructor */ /* constructor */
XList(); TensorListBase();
/* constructor */ /* constructor */
XList(int myMaxNum, bool isIntListOrNot = false); TensorListBase(int myMaxNum);
/* constructor */ /* constructor */
XList(int myMaxNum, XMem * myMem, bool isIntListOrNot = false); TensorListBase(int myMaxNum, XMem* myMem);
/* de-constructor */ /* de-constructor */
~XList(); ~TensorListBase();
/* utilities */ /* add an item into the list */
void Create(int myMaxNum, XMem * myMem); void Add(T&& item);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount); /* add an item into the list */
void AddList(XList * l); void Add(const T& item);
void AddInt(int i);
void Insert(int pos, void * item); /* add a number of items into the list */
void * GetItem(int i) const; void Add(T* inputItems, int inputItemCount);
int GetItemInt(int i);
void SetItem(int i, void * item); /* append a list to the current list */
void SetItemInt(int i, int item); void AddList(TensorListBase* l);
int FindFirst(void * item); /* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
void Clear(); void Clear();
void ClearStringList();
void Sort(int itemSize, ListCompare comp); /* sort the list */
void Sort(int itemSize);
/* reverse the list */
void Reverse(); void Reverse();
/* remove the item at position i */
void Remove(int i); void Remove(int i);
XList * Copy(XMem * myMem);
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
_XINLINE_ void * Get(int i) {return GetItem(i);}; T& operator[] (int i) {
_XINLINE_ int GetInt(int i) {return GetItemInt(i);}; return GetItem(i);
_XINLINE_ void Set(int i, void * item) {SetItem(i, item);}; };
_XINLINE_ void SetInt(int i, int item) {SetItemInt(i, item);}; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
}; };
extern XList NULLList; struct XTensor;
typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
typedef TensorListBase<void*> XList;
typedef TensorListBase<XTensor*> TensorList;
} } /* end of the nts (NiuTrans.Tensor) namespace */
/* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif // __TensorList_H__
...@@ -34,6 +34,11 @@ namespace nts{ ...@@ -34,6 +34,11 @@ namespace nts{
int testxmemid = 0; int testxmemid = 0;
void * recordp = NULL; void * recordp = NULL;
/*
for managing the memories
*/
XMemManager GMems;
XMem * GMem; XMem * GMem;
/* constructor */ /* constructor */
...@@ -48,6 +53,7 @@ XMem::XMem() ...@@ -48,6 +53,7 @@ XMem::XMem()
strcpy(name, "xmem"); strcpy(name, "xmem");
signature = 0; signature = 0;
mergeFreeOTF = true; mergeFreeOTF = true;
isInitialized = false;
} }
/* /*
...@@ -58,7 +64,7 @@ constructor ...@@ -58,7 +64,7 @@ constructor
>> myMode - mode of running the memory pool >> myMode - mode of running the memory pool
UNI_FREE: free all the space at the end of using the memory pool UNI_FREE: free all the space at the end of using the memory pool
FREE_ON_THE_FLY: normal "malloc" and "free" mode FREE_ON_THE_FLY: normal "malloc" and "free" mode
>> myBlockSize - size of memory block >> myBlockSize - size of a memory block
>> myBlockNum - number of memory blocks >> myBlockNum - number of memory blocks
>> myBufSize - size of buffer >> myBufSize - size of buffer
*/ */
...@@ -103,7 +109,7 @@ initialize it ...@@ -103,7 +109,7 @@ initialize it
>> myMode - mode of running the memory pool >> myMode - mode of running the memory pool
UNI_FREE: free all the space at the end of using the memory pool UNI_FREE: free all the space at the end of using the memory pool
FREE_ON_THE_FLY: normal "malloc" and "free" mode FREE_ON_THE_FLY: normal "malloc" and "free" mode
>> myBlockSize - size of memory block >> myBlockSize - size of a memory block
>> myBlockNum - number of memory blocks >> myBlockNum - number of memory blocks
>> myBufSize - size of buffer >> myBufSize - size of buffer
*/ */
...@@ -164,6 +170,7 @@ void XMem::Initialize(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int m ...@@ -164,6 +170,7 @@ void XMem::Initialize(int myDevID, MEMPOOL_MODE myMode, MTYPE myBlockSize, int m
#endif #endif
signature++; signature++;
isInitialized = true;
} }
/* free memory */ /* free memory */
...@@ -216,9 +223,9 @@ void XMem::Free(int myDevID, void * mem) ...@@ -216,9 +223,9 @@ void XMem::Free(int myDevID, void * mem)
} }
} }
/* /*
get signature get the signature
<< return - return the signature << return - the signature
*/ */
MTYPE XMem::GetSignature() MTYPE XMem::GetSignature()
{ {
...@@ -226,7 +233,7 @@ MTYPE XMem::GetSignature() ...@@ -226,7 +233,7 @@ MTYPE XMem::GetSignature()
} }
/* /*
use string as the name of the memory pool set the name of the memory pool
>> myName - name of the memory pool >> myName - name of the memory pool
*/ */
void XMem::SetName(const char * myName) void XMem::SetName(const char * myName)
...@@ -259,7 +266,7 @@ void XMem::SetDevice(int myDevID) ...@@ -259,7 +266,7 @@ void XMem::SetDevice(int myDevID)
} }
/* /*
switch to the device (with fast cuda execution mode) we want to work switch to the device (with fast cuda execution mode) we intend to work on
>> myDevID - device id(-1: CPU memory, >=0: GPU device ID) >> myDevID - device id(-1: CPU memory, >=0: GPU device ID)
*/ */
void XMem::SetDeviceFast(int myDevID) void XMem::SetDeviceFast(int myDevID)
...@@ -275,7 +282,7 @@ void XMem::SetDeviceFast(int myDevID) ...@@ -275,7 +282,7 @@ void XMem::SetDeviceFast(int myDevID)
} }
/* /*
run in static mode run in the static mode
>> myIsStatic - specify if the memory allocation is static >> myIsStatic - specify if the memory allocation is static
*/ */
void XMem::SetStaticMode(bool myIsStatic) void XMem::SetStaticMode(bool myIsStatic)
...@@ -300,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation) ...@@ -300,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(myIsForComputation) if(myIsForComputation)
CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS, CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS,
"Cannot create the cublas handle."); "Cannot create the cublas handle.");
SetDevice(devIDBackup); SetDevice(devIDBackup);
#endif #endif
...@@ -316,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -316,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{ {
delete[] memIndex; delete[] memIndex;
delete[] memIndex2; delete[] memIndex2;
delete[] minSizeIndex; delete[] minSizeIndex;
nodeNum = indexSize; nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2; nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum; indexEntryNum = minSizeNum;
memIndex = new MPieceNode[nodeNum]; memIndex = new MPieceNode[nodeNum];
memset(memIndex, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex, 0, sizeof(MPieceNode) * nodeNum);
...@@ -328,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -328,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2 = new MPieceNode[nodeNum]; memIndex2 = new MPieceNode[nodeNum];
memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum);
minSizeIndex = new MTYPE[indexEntryNum]; minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum); memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex[0] = minSizeFirst; minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++) for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2; minSizeIndex[i] = minSizeIndex[i - 1] * 2;
indexOffset = GetMSB(minSizeFirst); indexOffset = GetMSB(minSizeFirst);
} }
...@@ -752,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -752,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */ /* if all index nodes are used, we rebuild the index to release the nodes that are free */
if(nodeNumUsed == nodeNum){ if(nodeNumUsed == nodeNum){
RebuildIndex(); RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!"); CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
} }
/*if(testxmemid == 30){ /*if(testxmemid == 30){
...@@ -956,8 +963,8 @@ release a piece of memory as "free" ...@@ -956,8 +963,8 @@ release a piece of memory as "free"
*/ */
void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size) void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size)
{ {
if(p == NULL) if(p == NULL)
return; return;
if(size <= minSizeIndex[0]) if(size <= minSizeIndex[0])
size = minSizeIndex[0]; size = minSizeIndex[0];
...@@ -1087,7 +1094,7 @@ void XMem::RebuildIndex() ...@@ -1087,7 +1094,7 @@ void XMem::RebuildIndex()
block->mem = NULL; block->mem = NULL;
} }
else{ else{
/* if the block is in use, we build the index */ /* if the block is in use, we build the index */
int pieceCount = 0; int pieceCount = 0;
MTYPE size = 0; MTYPE size = 0;
MHeader * newLast = NULL; MHeader * newLast = NULL;
...@@ -1488,4 +1495,179 @@ cublasHandle_t * XMem::GetCublasHandle() ...@@ -1488,4 +1495,179 @@ cublasHandle_t * XMem::GetCublasHandle()
#endif #endif
/* constructor */
XMemManager::XMemManager()
{
Initialize();
}
/* de-constructor */
XMemManager::~XMemManager()
{
}
/* get memory size */
MTYPE XMemManager::GetAvailableMemory()
{
unsigned long freeMem = 0;
#if __APPLE__
int mib[2] = {CTL_HW, HW_MEMSIZE};
unsigned int namelen = sizeof(mib) / sizeof(mib[0]);
unsigned long long size;
size_t len = sizeof(size);
if (sysctl(mib, namelen, &size, &len, NULL, 0) < 0){
ShowNTErrors("Cannot get memory size on Mac!");
}
else{
return size;
}
#elif _WIN32
MEMORYSTATUSEX memoryStatus;
memoryStatus.dwLength = sizeof(memoryStatus);
if (GlobalMemoryStatusEx(&memoryStatus)){
freeMem = memoryStatus.ullAvailPhys;
}
#else
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#endif
return (MTYPE)freeMem;
}
/* get GPU memory size */
MTYPE XMemManager::GetAvailableGPUMemory(int devID)
{
size_t freeMem = 0;
#ifdef USE_CUDA
size_t totalMem = 0;
cudaSetDevice(devID);
if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
XPRINT(0, stderr, "cannot get GPU memory information.");
exit(1);
}
#endif
return (MTYPE)freeMem;
}
/* get buffer size */
void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
{
*myBufSize = 0;
if (freeMem >= MILLION * 128){
*myBufSize = MILLION * 32;
if (freeMem >= MILLION * 256){
*myBufSize = MILLION * 64;
if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256;
if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512;
}
}
}
}
}
/* initialize it and set the global memory information */
void XMemManager::Initialize()
{
srand((unsigned int)time(NULL));
Free();
/* CPUs (we actually do not care about how many CPUs are using) */
nCPUMem = 1;
/* GPUs */
nGPUMem = 0;
#ifdef USE_CUDA
if (cudaGetDeviceCount(&nGPUMem) != cudaSuccess) {
XPRINT(0, stderr, "cannot get GPU information.");
exit(1);
}
#endif
}
/* free it */
void XMemManager::Free()
{
for (int i = 0; i < MAX_CPU_MEM_NUM; i++)
CPUMems[i].Free();
for (int i = 0; i < MAX_GPU_MEM_NUM; i++)
GPUMems[i].Free();
}
/* get global memory pool */
XMem * XMemManager::GetMem(const int devID)
{
XMem * mem = NULL;
if (devID < 0){
if(!CPUMems[0].isInitialized){
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, FREE_ON_THE_FLY,
MIN_BLOCK_SIZE_FOR_MEMPOOL,
MIN_BLOCK_NUM_FOR_MEMPOOL,
myBufSize);
}
mem = CPUMems;
}
else{
if (devID < nGPUMem){
if(!GPUMems[devID].isInitialized){
MTYPE freeMem = GetAvailableGPUMemory(devID);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[devID].Initialize(devID, FREE_ON_THE_FLY,
MIN_BLOCK_SIZE_FOR_MEMPOOL,
MIN_BLOCK_NUM_FOR_MEMPOOL,
myBufSize);
}
mem = GPUMems + devID;
}
else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
}
}
return mem;
}
/* get global memory size */
int XMemManager::GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize)
{
XMem * mem = GetMem(devID);
int result = 0;
if (mem != NULL){
*myBlockSize = mem->maxBlockSize;
*myBlockNum = mem->blockNum;
*myBufSize = mem->bufSize;
result = 1;
}
return result;
}
/* show memory information */
void XMemManager::ShowMemInfo()
{
XPRINT(1, stderr, "Memory Information:\n");
MTYPE myBlockSize, myBufSize;
int myBlockNum;
for(int i = 0; i < nCPUMem; i++){
GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", myBlockSize, myBlockNum, myBufSize);
}
for(int i = 0; i < nGPUMem; i++){
GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", i, myBlockSize, myBlockNum, myBufSize);
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -39,6 +39,15 @@ ...@@ -39,6 +39,15 @@
#include <curand.h> #include <curand.h>
#endif #endif
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#elif WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
...@@ -51,8 +60,10 @@ typedef long long INT_64; ...@@ -51,8 +60,10 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1 #define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH #define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_MEM_NUM 16
#define MAX_GPU_MEM_NUM 16
/* /*
mode of runnig a memory pool mode of runnig a memory pool
...@@ -202,6 +213,9 @@ public: ...@@ -202,6 +213,9 @@ public:
MTYPE curUsedPin; MTYPE curUsedPin;
MTYPE bufUsedPin; MTYPE bufUsedPin;
/* indicates whether the memory pool is initialized */
bool isInitialized;
#ifdef USE_CUDA #ifdef USE_CUDA
/* handle used for cublas */ /* handle used for cublas */
cublasHandle_t cublasHandle; cublasHandle_t cublasHandle;
...@@ -413,6 +427,61 @@ public: ...@@ -413,6 +427,61 @@ public:
}; };
/*
a class for the management of memory
*/
class XMemManager
{
private:
/* cpu memory pool information */
XMem CPUMems[MAX_CPU_MEM_NUM];
/* number of cpu memory pools */
int nCPUMem;
/* gpu memory pool information */
XMem GPUMems[MAX_GPU_MEM_NUM];
/* number of gpu memory pools */
int nGPUMem;
public:
/* constructor */
XMemManager();
/* de-constructor */
~XMemManager();
/* get memory size */
MTYPE GetAvailableMemory();
/* get GPU memory size */
MTYPE GetAvailableGPUMemory(int devID);
/* get buffer size */
void GetBufferSize(MTYPE freeMem, MTYPE * myBufSize);
/* initialize it and set the global memory information */
void Initialize();
/* free it */
void Free();
/* get global memory pool */
XMem * GetMem(const int devID);
/* get global memory size */
int GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize);
/* show memory information */
void ShowMemInfo();
};
/* managing the memories */
extern XMemManager GMems;
extern XMem * GMem; extern XMem * GMem;
extern int testxmemid; extern int testxmemid;
......
...@@ -59,6 +59,8 @@ const char * GetOPName(int type) ...@@ -59,6 +59,8 @@ const char * GetOPName(int type)
return "M_DIV"; return "M_DIV";
else if (type == MATH_DIVDIM) else if (type == MATH_DIVDIM)
return "M_DIVDIM"; return "M_DIVDIM";
else if (type == MATH_MASK)
return "M_MASK";
else if (type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
...@@ -108,7 +110,7 @@ const char * GetOPName(int type) ...@@ -108,7 +110,7 @@ const char * GetOPName(int type)
else if (type == REDUCE_REDUCEVARIANCE) else if (type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE"; return "R_REDUCEVARIANCE";
} }
else if ((type & DATA_BASE) != 0) { else if ((type & DATA_BASE) != 0) {
if (type == GETANDSET_CONVERTDATATYPE) if (type == GETANDSET_CONVERTDATATYPE)
return "G_CONVERTDATATYPE"; return "G_CONVERTDATATYPE";
else if (type == GETANDSET_INDEXTOONEHOT) else if (type == GETANDSET_INDEXTOONEHOT)
...@@ -118,8 +120,10 @@ const char * GetOPName(int type) ...@@ -118,8 +120,10 @@ const char * GetOPName(int type)
else if (type == GETANDSET_SELECT) else if (type == GETANDSET_SELECT)
return "G_SELECT"; return "G_SELECT";
} }
else if ((type & SHAPE_BASE) != 0) { else if ((type & SHAPE_BASE) != 0){
if (type == MOVEMENT_COPYINDEXED) if (type == GETANDSET_SELECT)
return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED"; return "M_COPYINDEXED";
else if (type == MOVEMENT_COPYVALUES) else if (type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES"; return "M_COPYVALUES";
......
...@@ -48,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -48,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CLIP MATH_ROUND + 1 #define MATH_CLIP MATH_ROUND + 1
#define MATH_DIV MATH_CLIP + 1 #define MATH_DIV MATH_CLIP + 1
#define MATH_DIVDIM MATH_DIV + 1 #define MATH_DIVDIM MATH_DIV + 1
#define MATH_MATRIXMUL MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
...@@ -85,6 +86,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -85,6 +86,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1 #define GETANDSET_SELECT GETANDSET_ONEHOTTOINDEX + 1
#define SHAPE_BASE DATA_BASE * 2 #define SHAPE_BASE DATA_BASE * 2
#define MOVEMENT SHAPE_BASE + 1 #define MOVEMENT SHAPE_BASE + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1 #define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1 #define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
......
...@@ -146,7 +146,7 @@ run a set of jobs in parallel ...@@ -146,7 +146,7 @@ run a set of jobs in parallel
>> jobArgs - the list of arguments for each job >> jobArgs - the list of arguments for each job
>> sleepTime - time to sleep (in ms) for each round >> sleepTime - time to sleep (in ms) for each round
*/ */
void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
{ {
if(threadNum <= 0){ if(threadNum <= 0){
XPRINT(1, stderr, "Error! No threads were created!\n"); XPRINT(1, stderr, "Error! No threads were created!\n");
...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) ...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c); TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);
/* the arguments that are passed to the function */ /* the arguments that are passed to the function */
volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c); volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);
/* thread */ /* thread */
XThread * thread = threads + availableThreads[i]; XThread * thread = threads + availableThreads[i];
......
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
void KillThreads(); void KillThreads();
/* run a set of jobs in parallel */ /* run a set of jobs in parallel */
void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0); void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);
/* get the number of parallel jobs to run */ /* get the number of parallel jobs to run */
int GetJobNum(int size); int GetJobNum(int size);
......
...@@ -42,7 +42,7 @@ job item used in queues ...@@ -42,7 +42,7 @@ job item used in queues
JobQueueNode::JobQueueNode() JobQueueNode::JobQueueNode()
{ {
job = NULL; job = NULL;
args = new XList(1); args = new TensorList(1);
} }
/* de-constructor */ /* de-constructor */
...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize) ...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
head = 0; head = 0;
tail = 0; tail = 0;
isJobQueue = false; isJobQueue = false;
jobDequeuerArgs = new XList(1); jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false; jobDequeuerBreak = false;
runningJobCount = 0; runningJobCount = 0;
jobStream = NULL; jobStream = NULL;
...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID) ...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true; isJobQueue = true;
jobDequeuerArgs->Clear(); jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid); // warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
jobDequeuer.function = (TFunction)DequeueJobs; jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs; jobDequeuer.argv = jobDequeuerArgs;
...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer() ...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer()
} }
/* add a job item to process */ /* add a job item to process */
void XQueue::EnqueueJob(void * job, XList * jobArgs) void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{ {
MUTEX_LOCK(jobQueueMutex); MUTEX_LOCK(jobQueueMutex);
runningJobCount++; runningJobCount++;
...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs) ...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
} }
/* job item consumer */ /* job item consumer */
void XQueue::DequeueJobs(XList * args) void XQueue::DequeueJobs(TensorList * args)
{ {
CheckNTErrors((args->count == 2), "Illegal arguments!"); CheckNTErrors((args->count == 2), "Illegal arguments!");
......
...@@ -52,7 +52,7 @@ public: ...@@ -52,7 +52,7 @@ public:
void * job; void * job;
/* arguments of the job */ /* arguments of the job */
XList * args; TensorList * args;
public: public:
/* constructor */ /* constructor */
...@@ -102,7 +102,7 @@ private: ...@@ -102,7 +102,7 @@ private:
XThread jobDequeuer; XThread jobDequeuer;
/* argument list of jobDequeuer */ /* argument list of jobDequeuer */
XList * jobDequeuerArgs; TensorList * jobDequeuerArgs;
/* indicates whether jobDequeuer stops */ /* indicates whether jobDequeuer stops */
bool jobDequeuerBreak; bool jobDequeuerBreak;
...@@ -141,11 +141,11 @@ public: ...@@ -141,11 +141,11 @@ public:
void StopJobConsumer(); void StopJobConsumer();
/* add a job item to process */ /* add a job item to process */
void EnqueueJob(void * job, XList * jobArgs); void EnqueueJob(void * job, TensorList * jobArgs);
/* job item consumer */ /* job item consumer */
static static
void DequeueJobs(XList * args); void DequeueJobs(TensorList * args);
/* get the break flag */ /* get the break flag */
bool GetJobBreak(); bool GetJobBreak();
......
...@@ -85,7 +85,7 @@ namespace nts{ ...@@ -85,7 +85,7 @@ namespace nts{
#endif #endif
typedef void (*TFunction) (volatile XList*); typedef void (*TFunction) (volatile TensorList*);
/* /*
This is a class that wraps the standard implementation of threading This is a class that wraps the standard implementation of threading
...@@ -133,7 +133,7 @@ public: ...@@ -133,7 +133,7 @@ public:
/* arguments (for the function to run) */ /* arguments (for the function to run) */
volatile volatile
XList * argv; TensorList * argv;
/* a flag to break */ /* a flag to break */
volatile volatile
......
...@@ -36,13 +36,9 @@ ...@@ -36,13 +36,9 @@
#include "arithmetic/MatrixMulBatched.h" #include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/Multiply.h" #include "arithmetic/Multiply.h"
#include "arithmetic/MultiplyDim.h" #include "arithmetic/MultiplyDim.h"
#include "arithmetic/Negate.h"
#include "arithmetic/Sign.h"
#include "arithmetic/Sub.h" #include "arithmetic/Sub.h"
#include "arithmetic/SubDim.h" #include "arithmetic/SubDim.h"
#include "arithmetic/Sum.h" #include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "arithmetic/SumDim.h" #include "arithmetic/SumDim.h"
#include "arithmetic/XTensorBLAS.h" #include "arithmetic/XTensorBLAS.h"
#include "arithmetic/MulAndShift.h" #include "arithmetic/MulAndShift.h"
...@@ -56,7 +52,6 @@ ...@@ -56,7 +52,6 @@
#include "math/Clip.h" #include "math/Clip.h"
#include "math/Compare.h" #include "math/Compare.h"
#include "math/Normalize.h" #include "math/Normalize.h"
#include "math/Power.h"
#include "math/ScaleAndShift.h" #include "math/ScaleAndShift.h"
#include "math/Unary.h" #include "math/Unary.h"
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "Div.h" #include "Div.h"
#include "Div.cuh" #include "Div.cuh"
#include "DivDim.h" #include "DivDim.h"
...@@ -41,12 +42,15 @@ where i is the index of the item ...@@ -41,12 +42,15 @@ where i is the index of the item
*/ */
void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim); _CudaDiv(a, b, c, alpha, leadingDim);
...@@ -138,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -138,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div(a, b, a, alpha, leadingDim); _Div(a, b, a, alpha, leadingDim);
} }
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the division is performed as DivDim (in more details in DivDim.h) return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor >> a - a tensor
...@@ -225,9 +246,8 @@ where i is the index of the item ...@@ -225,9 +246,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -241,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -241,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */ /* call _Div function */
_Div(&a, &b, &c, 0, leadingDim); _Div(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -252,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -252,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -229,4 +229,4 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in ...@@ -229,4 +229,4 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise division of two tensors (return an XTensor structure) element-wise division of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise division of two tensors: ...@@ -54,7 +55,7 @@ element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i) c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -19,10 +19,12 @@ ...@@ -19,10 +19,12 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15
*/ */
#include <math.h>
#include "Div.h" #include "Div.h"
#include "DivDim.h" #include "DivDim.h"
#include "DivDim.cuh" #include "DivDim.cuh"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -42,6 +44,8 @@ i.e., a is divided with b by broadcasting ...@@ -42,6 +44,8 @@ i.e., a is divided with b by broadcasting
*/ */
void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alpha) void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alpha)
{ {
n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in division!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in division!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
...@@ -50,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp ...@@ -50,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(XTensor::IsSameShaped(a, b)){
_Div(a, b, c, alpha); _Div(a, b, c, alpha);
return; return;
...@@ -151,6 +157,8 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha) ...@@ -151,6 +157,8 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order);
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
...@@ -175,9 +183,8 @@ i.e., a is divided with b by broadcasting ...@@ -175,9 +183,8 @@ i.e., a is divided with b by broadcasting
>> c - where we put result. we save it in a if c is NULL >> c - where we put result. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> alpha - the scaling factor >> alpha - the scaling factor
>> requireLink - if add operation to network
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -186,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, ...@@ -186,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha,
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad == true) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c ...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a, where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false); void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha) ...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
} }
/* /*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha) ...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
/* call _Sum function */ /* call _Mask function */
_Mask(&a, &mask, &c, alpha); _Mask(&a, &mask, &c, alpha);
/* tensor connections */ /* tensor connections */
//XLink::MakeLink(&a, &mask, &c, MATH_SUM); XLink::MakeLink(&a, &mask, &c, MATH_MASK);
//XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
// TODO!!
ShowNTErrors("TODO!");
return c; return c;
} }
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Mask function */
_Mask(&a, &mask, &c, alpha);
if (c.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
}
}
} }
\ No newline at end of file
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks - * I'll attend several conferences and workshops in the following weeks -
* busy days :( * busy days :(
*/ */
#ifndef __MASK_H__ #ifndef __MASK_H__
#define __MASK_H__ #define __MASK_H__
...@@ -28,21 +28,22 @@ ...@@ -28,21 +28,22 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
mask entries of a given tensor: mask entries of a given tensor:
c(i) = a(i) if mask(i) is non-zero c(i) = a(i) if mask(i) is non-zero
c(i) = alpha if mask(i) = 0 c(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha); void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (on site): mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha); void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha = 0.0);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
...@@ -52,7 +53,14 @@ where i is the index of the element ...@@ -52,7 +53,14 @@ where i is the index of the element
*/ */
XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0); XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0);
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha = 0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MASK_H__ #endif // __MASK_H__
...@@ -106,9 +106,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -106,9 +106,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
cBlockNum *= b->dimSizeRDI[i]; cBlockNum *= b->dimSizeRDI[i];
} }
XList * aList = new XList(10); TensorList * aList = new TensorList(10);
XList * bList = new XList(10); TensorList * bList = new TensorList(10);
XList * cList = new XList(10); TensorList * cList = new TensorList(10);
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] }; int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] }; int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] }; int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
...@@ -200,7 +200,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -200,7 +200,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList; delete cList;
} }
bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c) bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c)
{ {
if (!(a && b && c)) if (!(a && b && c))
return false; return false;
...@@ -229,10 +231,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen ...@@ -229,10 +231,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen
dimSize[sub++] = bm; dimSize[sub++] = bm;
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
if (dimSize[i] != c->dimSize[i]) if (dimSize[i] != c->dimSize[i]) {
delete[] dimSize;
return false; return false;
}
} }
delete[] dimSize;
return true; return true;
} }
...@@ -355,11 +360,9 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -355,11 +360,9 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
return c; return c;
} }
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -392,9 +395,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -392,9 +395,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
} }
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
...@@ -455,7 +458,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -455,7 +458,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
} }
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -490,7 +493,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, ...@@ -490,7 +493,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* /*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
...@@ -56,22 +59,28 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -56,22 +59,28 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0,
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XPRunner * parallelRunner = NULL);
TENSOR_DATA_TYPE dataType, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); TENSOR_DATA_TYPE dataType, DTYPE alpha = (DTYPE)1.0,
XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/ /* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b, XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!"); "Input tensors must have a order = 2!");
int an = a->dimSize[0], am = a->dimSize[1]; int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1]; int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1]; int cn = c->dimSize[0], cm = c->dimSize[1];
int am2 = transposedA == X_TRANS ? an : am; int am2 = transposedA == X_TRANS ? an : am;
int an2 = transposedA == X_TRANS ? am : an; int an2 = transposedA == X_TRANS ? am : an;
int bm2 = transposedB == X_TRANS ? bn : bm; int bm2 = transposedB == X_TRANS ? bn : bm;
int bn2 = transposedB == X_TRANS ? bm : bn; int bn2 = transposedB == X_TRANS ? bm : bn;
int cm2 = cm; int cm2 = cm;
int cn2 = cn; int cn2 = cn;
CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2), CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
......
...@@ -38,17 +38,23 @@ argument5: matrix a ...@@ -38,17 +38,23 @@ argument5: matrix a
argument6: matrix b argument6: matrix b
argument7: matrix c (c=a*b*\alpha + c*beta) argument7: matrix c (c=a*b*\alpha + c*beta)
*/ */
void _MatrixMul2DMultiTheading(XList * args) void _MatrixMul2DMultiTheading(TensorList * args)
{ {
int x1 = *(int*)args->GetItem(0); CheckNTErrors(args->count == 2, "invalid argument number!");
int y1 = *(int*)args->GetItem(1); IntList * indexArgs = (IntList*)args->GetItem(0);
int x2 = *(int*)args->GetItem(2); TensorList * matrixArgs = (TensorList*)args->GetItem(1);
int y2 = *(int*)args->GetItem(3); CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
XTensor * a = (XTensor*)args->GetItem(4); CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * b = (XTensor*)args->GetItem(5);
XTensor * c = (XTensor*)args->GetItem(6); XTensor * a = matrixArgs->GetItem(0);
DTYPE alpha = *(DTYPE*)args->GetItem(7); XTensor * b = matrixArgs->GetItem(1);
DTYPE beta = *(DTYPE*)args->GetItem(8); XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX #ifdef FAST_MATRIX
int am = a->dimSize[1]; int am = a->dimSize[1];
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication for a block (x1,y1) - (x2,y2) matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/ */
void _MatrixMul2DMultiTheading(XList * args); void _MatrixMul2DMultiTheading(TensorList * args);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -154,7 +154,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -154,7 +154,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta) XTensor * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors(a && b && c, "Empty input tensors!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Input tensors should have the same data type!"); "Input tensors should have the same data type!");
CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2, CheckNTErrors(a->order >= 2 && b->order >= 2 && c->order >= 2,
...@@ -227,9 +227,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1] ...@@ -227,9 +227,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> alpha - scalar >> alpha - scalar
>> beta - scalar >> beta - scalar
*/ */
void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, const TensorList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta) TensorList * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors(a && b && c, "Empty input lists!"); CheckNTErrors(a && b && c, "Empty input lists!");
CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!"); CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");
......
...@@ -58,8 +58,8 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, cons ...@@ -58,8 +58,8 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, cons
matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta (for list inputs) matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta (for list inputs)
optimized for GPU optimized for GPU
*/ */
void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, const TensorList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); TensorList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
/* /*
matrix multiplication of the two tensors (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication of the two tensors (return an XTensor structure) c = trans(a) * trans(b) * alpha
......
...@@ -66,7 +66,7 @@ operation c = x * w + b MulAndShift ...@@ -66,7 +66,7 @@ operation c = x * w + b MulAndShift
<< return - the result of matrix multiplication << return - the result of matrix multiplication
*/ */
XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
DTYPE alpha, XPRunner * parallelRunner) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!"); CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
...@@ -129,9 +129,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, ...@@ -129,9 +129,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
DelTensorBuf(tmp); DelTensorBuf(tmp);
return c; return c;
} }
} }
\ No newline at end of file
...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "Multiply.h" #include "Multiply.h"
#include "Multiply.cuh" #include "Multiply.cuh"
#include "MultiplyDim.h" #include "MultiplyDim.h"
...@@ -41,12 +42,15 @@ where i is the index of the item ...@@ -41,12 +42,15 @@ where i is the index of the item
*/ */
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim); _CudaMultiply(a, b, c, alpha, leadingDim);
...@@ -139,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -139,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Multiply(a, b, a, alpha, leadingDim); _Multiply(a, b, a, alpha, leadingDim);
} }
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void MultiplyMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Multiply(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h) return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor >> a - a tensor
...@@ -226,9 +247,8 @@ where i is the index of the item ...@@ -226,9 +247,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -242,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -242,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */ /* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -253,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -253,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */ /* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -122,10 +122,10 @@ where i is the item index ...@@ -122,10 +122,10 @@ where i is the item index
*/ */
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!");
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
......
...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void MultiplyMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise product of two tensors (return an XTensor structure) element-wise product of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise product of two tensors: ...@@ -54,7 +55,7 @@ element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2018-08-14 * $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2018-08-14
*/ */
#include <math.h>
#include "Multiply.h" #include "Multiply.h"
#include "MultiplyDim.h" #include "MultiplyDim.h"
#include "MultiplyDim.cuh" #include "MultiplyDim.cuh"
...@@ -42,8 +43,10 @@ i.e., a is multiplied with b by broadcasting ...@@ -42,8 +43,10 @@ i.e., a is multiplied with b by broadcasting
>> n - the dimension index >> n - the dimension index
>> alpha - the scaling factor >> alpha - the scaling factor
*/ */
void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alpha) { void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alpha)
{
n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in multiplication!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in multiplication!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
...@@ -52,6 +55,8 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -52,6 +55,8 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(XTensor::IsSameShaped(a, b)){
_Multiply(a, b, c, alpha); _Multiply(a, b, c, alpha);
return; return;
...@@ -134,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha) ...@@ -134,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
} }
/* /*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void MultiplyDimMe(XTensor& a, const XTensor& b, int n, DTYPE alpha)
{
_MultiplyDim(&a, &b, &a, n, alpha);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections) tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -151,6 +174,8 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n) ...@@ -151,6 +174,8 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order);
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
...@@ -173,9 +198,8 @@ i.e., a is multiplied with b by broadcasting ...@@ -173,9 +198,8 @@ i.e., a is multiplied with b by broadcasting
>> b - another tensor whose size is equal to that of dimension n of a >> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a * b + \alpha * c. we save it in a if c is NULL >> c - where we put a * b + \alpha * c. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> requireLink - if add operation to network
*/ */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink) void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -184,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req ...@@ -184,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -340,9 +364,8 @@ where some of dimensions of b can be of size 1 ...@@ -340,9 +364,8 @@ where some of dimensions of b can be of size 1
>> a - a tensor >> a - a tensor
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> requireLink - if add operation to network
*/ */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink) void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -351,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ ...@@ -351,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0); _MultiplyBroadcast(&a, &b, &c, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
/* /*
* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2018-08-14 * $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2018-08-14
* $Updated by: LinYe (email: linye2015@outlook.com) 2019-07-30 float16 added
*/ */
#include "../../XDevice.h" #include "../../XDevice.h"
......
...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */ i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0); void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
void MultiplyDimMe(XTensor & a, const XTensor & b, int n, DTYPE alpha = 0.0);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n); ...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting */ i.e., a is multiplied with b by broadcasting */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink = false); void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b); XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink = false); void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Negate(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaNegate(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++)
db[i] = -d[i];
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _NegateMe(XTensor * a)
{
_Negate(a, a);
}
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor Negate(const XTensor & a)
{
XTensor b(&a);
b.SetTMPFlag();
/* call _Negate function */
_Negate(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
return b;
}
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Negate(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Negate function */
_Negate(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its negtive value (CUDA Kernel)
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
template <class T>
__global__
void KernelNegate(T * a, T * b, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
b[i] = -a[i];
}
/*
set each entry to its negtive value
>> a - input tensor
>> b - output tensor
*/
void _CudaNegate(const XTensor * a, XTensor * b)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_CUH__
#define __NEGATE_CUH__
#include "Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its negtive value (CUDA Kernel) */
template <class T>
__global__
void KernelNegate(T * a, T * b, int size);
/* set each entry to its negtive value */
void _CudaNegate(const XTensor * a, XTensor * b);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */
void _Negate(const XTensor * a, XTensor * b);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
/* set every entry to its minus value */
void Negate(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h"
#include "Sign.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void _Sign(const XTensor * a, XTensor * b)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaSign(a, b);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0)
db[i] = 1.0F;
else if (d[i] == 0)
db[i] = 0.0F;
else
db[i] = -1.0F;
}
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _SignMe(XTensor * a)
{
_Sign(a, a);
}
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor Sign(const XTensor & a)
{
XTensor b(&a);
b.SetTMPFlag();
/* call _Sign function */
_Sign(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
return b;
}
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Sign(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Sign function */
_Sign(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论