Commit 314f4370 by liyinqiao

Merge branch 'liyinqiao' into xiaotong-working

# Conflicts:
#	source/tensor/XDevice.cpp
#	source/tensor/XMem.cpp
#	source/tensor/XTensor.cpp
parents 7c17670d 58181c8d
...@@ -207,4 +207,4 @@ void SumDimTest() ...@@ -207,4 +207,4 @@ void SumDimTest()
z.Dump(stderr, "z:"); z.Dump(stderr, "z:");
delete[] data; delete[] data;
} }
\ No newline at end of file
...@@ -20,7 +20,9 @@ ...@@ -20,7 +20,9 @@
*/ */
#include "XBackwardLoss.h" #include "XBackwardLoss.h"
#include "XNoder.h"
#include "../tensor/XName.h" #include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/getandset/SetData.h" #include "../tensor/core/getandset/SetData.h"
#include "../tensor/function/HardTanH.h" #include "../tensor/function/HardTanH.h"
#include "../tensor/function/Identity.h" #include "../tensor/function/Identity.h"
...@@ -31,6 +33,60 @@ ...@@ -31,6 +33,60 @@
namespace nts{ namespace nts{
/* compute dE/dx of a node */
void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(income.tailNum >= 1, "Wrong number of tensors for loss computation!");
XTensor * output = income.tails[0];
XTensor * gold = NULL;
XTensor * weight = NULL;
XTensor * padding = NULL;
int leadingDim;
XNoder::MakeGrad(output);
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
return;
}
gold = income.tails[1];
if(operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
padding = income.tails[2];
leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
}
else{
ShowNTErrors("Wrong activation function type!");
}
node->visitMark = NODE_FINISHED;
}
/* indicates whether the node is for a loss computation */
bool XLossGrad::IsLossOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & LOSS_BASE) != 0;
}
/* /*
compute dE/dx for a given function y = f(x) compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss) >> gold - gold standard to measure error (or loss)
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h" #include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XBACKWARDLOSS_H__ #ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__ #define __XBACKWARDLOSS_H__
...@@ -34,6 +35,14 @@ namespace nts{ ...@@ -34,6 +35,14 @@ namespace nts{
class XLossGrad class XLossGrad
{ {
public: public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a Loss computation */
static
bool IsLossOP(XTensor * node);
/* compute dE/dx for a given function y = f(x) */ /* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x, void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding, XTensor * dedy, XTensor * dedx, XTensor * padding,
......
...@@ -81,6 +81,12 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -81,6 +81,12 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradPower(node, isEfficient); GradPower(node, isEfficient);
else if(operID == MATH_SCALEANDSHIFT) else if(operID == MATH_SCALEANDSHIFT)
GradScaleAndShift(node, isEfficient); GradScaleAndShift(node, isEfficient);
else if(operID == MATH_SCALE)
GradScale(node, isEfficient);
else if(operID == MATH_DESCALE)
GradDescale(node, isEfficient);
else if(operID == MATH_SHIFT)
GradShift(node, isEfficient);
else if(operID == MATH_SUB) else if(operID == MATH_SUB)
GradSub(node, isEfficient); GradSub(node, isEfficient);
else if(operID == MATH_SUBDIM) else if(operID == MATH_SUBDIM)
...@@ -719,12 +725,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient) ...@@ -719,12 +725,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!"); CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!");
_Multiply(node->grad, b, a->grad, 1.0F);
_Multiply(node->grad, a, b->grad, 1.0F); if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
_Multiply(node->grad, b, a->grad, 1.0F);
}
if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b);
_Multiply(node->grad, a, b->grad, 1.0F);
}
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
...@@ -888,88 +900,8 @@ gradient for normalize ...@@ -888,88 +900,8 @@ gradient for normalize
*/ */
void XMathGrad::GradNormalize(XTensor * node, bool isEfficient) void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
{ {
ShowNTErrors("This is really a bad piece of code!!!"); ShowNTErrors("TODO!");
XLink &income = node->income;
CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");
XTensor * input = income.tails[0];
XTensor * mean = income.tails[1];
XTensor * var = income.tails[2];
XTensor * a = income.tails[3];
XTensor * b = income.tails[4];
XTensor * c = NewTensor(var);
XTensor * d = NewTensor(a);
XTensor * e = NewTensor(a);
XTensor * f = NewTensor(a);
XTensor * g = NewTensor(a);
XTensor * h = NewTensor(a);
XTensor * i = NewTensor(a);
XTensor * j = NewTensor(a);
XTensor * k = NewTensor(var);
XTensor * p = NewTensor(var);
XTensor * q = NewTensor(var);
XTensor * r = NewTensor(a);
XTensor * x = NewTensor(mean);
XTensor * y = NewTensor(mean);
XTensor * z = NewTensor(mean);
DTYPE epsilon = income.GetParam(1);
int dim = income.GetParamInt(0);
int n = a->GetDim(dim);
XNoder::MakeGrad(input);
XNoder::MakeGrad(mean);
XNoder::MakeGrad(var);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
/* dEdinput */
_ScaleAndShift(var, c, 1.0F, epsilon);
_Unsqueeze(c, d, dim, n);
_Power(d, e, -0.5F);
_Multiply(a, e, f);
_Multiply(node->grad, f, input->grad, 1.0F);
/* dEdmean */
_ScaleAndShift(f, g, -1.0F);
_ReduceSum(g, x, dim);
_ReduceSum(node->grad, y, dim);
_Multiply(y, x, mean->grad, 1.0F);
/* dEdvar */
_Unsqueeze(mean, h, dim, n);
_Sub(input, h, i);
_Multiply(a, i, j);
_Power(var, k, -1.5F);
_ScaleAndShift(k, p, -0.5F);
_ReduceSum(j, z, dim);
_Multiply(z, p, q);
_Multiply(y, q, var->grad, 1.0F);
/* dEda */
_Multiply(i, e, r);
_Multiply(node->grad, r, a->grad, 1.0F);
/* dEdb */
_Sum(b->grad, node->grad, b->grad);
node->visitMark = NODE_FINISHED;
delete c;
delete d;
delete e;
delete f;
delete g;
delete h;
delete i;
delete j;
delete k;
delete p;
delete q;
delete r;
delete x;
delete y;
delete z;
} }
/* /*
...@@ -1030,6 +962,82 @@ void XMathGrad::GradScaleAndShift(XTensor * node, bool isEfficient) ...@@ -1030,6 +962,82 @@ void XMathGrad::GradScaleAndShift(XTensor * node, bool isEfficient)
} }
/* /*
gradient for Scale
for
c = a * scale
we have
dE/da = dE/dc * scale
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradScale(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SCALE!");
XTensor * a = income.tails[0];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad, scale);
node->visitMark = NODE_FINISHED;
}
/*
gradient for Descale
for
c = a / descale
we have
dE/da = dE/dc / descale
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradDescale(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for DESCALE!");
XTensor * a = income.tails[0];
DTYPE descale = income.GetParam(0);
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad, 1/descale);
node->visitMark = NODE_FINISHED;
}
/*
gradient for Shift
for
c = a + shift
we have
dE/da = dE/dc
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradShift(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SHIFT!");
XTensor * a = income.tails[0];
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad);
node->visitMark = NODE_FINISHED;
}
/*
gradient for minus gradient for minus
for for
c = a - b * \beta c = a - b * \beta
......
...@@ -130,6 +130,18 @@ private: ...@@ -130,6 +130,18 @@ private:
static static
void GradScaleAndShift(XTensor * node, bool isEfficient); void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Scale */
static
void GradScale(XTensor * node, bool isEfficient);
/* gradient for Shift */
static
void GradShift(XTensor * node, bool isEfficient);
/* gradient for Descale */
static
void GradDescale(XTensor * node, bool isEfficient);
/* gradient for Minus */ /* gradient for Minus */
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
......
...@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent) ...@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
GradCopyIndexed(node, isEfficent); GradCopyIndexed(node, isEfficent);
else if(operID == MOVEMENT_GATHER) else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent); GradGather(node, isEfficent);
else if (operID == MOVEMENT_DROPOUTWITHINDEX)
GradDropoutWithIndex(node, isEfficent);
else if(operID == SHAPE_MERGE) else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent); GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST) else if(operID == SHAPE_MERGE_LIST)
...@@ -115,7 +117,7 @@ dE/da = spreadforgather(b) ...@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
void XShapeGrad::GradGather(XTensor * node, bool isEfficent) void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * index = income.tails[1]; XTensor * index = income.tails[1];
...@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent) ...@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/*
gradient computation for DropoutWithIndex function
*/
void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(input);
//_Identity(node->grad, input->grad);
_CopyValues(node->grad, input->grad);
int order = node->grad->order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = node->grad->dimSize[i];
}
int order1 = 1;
int * dimSize1 = new int[order1];
dimSize1[0] = input->grad->unitNum;
input->grad->Reshape(order1, dimSize1);
_DropoutWithIndex(node->grad, index, input->grad);
_ScaleAndShiftMe(input->grad, scale);
input->grad->Reshape(order, dimSize);
node->visitMark = NODE_FINISHED;
}
/* /*
gradient for merge gradient for merge
for for
...@@ -232,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -232,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL; XTensor * last = NULL;
XList smalls(income.tailNum); TensorList smalls(income.tailNum);
XList smallsGrad(income.tailNum); TensorList smallsGrad(income.tailNum);
bool mergeOnly = true; bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){ for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i]; XTensor * tail = income.tails[i];
...@@ -401,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -401,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
/* we compute the gradient for current node, rather than for /* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */ child node, i.e., we use the outgoing edge here */
XLink &outgo = node->outgo; XLink &outgo = node->outgo;
XList splits(outgo.tailNum); TensorList splits(outgo.tailNum);
int whereToSplit = -1; int whereToSplit = -1;
int splitNum = 0; int splitNum = 0;
......
...@@ -54,6 +54,10 @@ private: ...@@ -54,6 +54,10 @@ private:
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node, bool isEfficent); void GradMerge(XTensor * node, bool isEfficent);
......
...@@ -55,7 +55,7 @@ void XNetClearAll() ...@@ -55,7 +55,7 @@ void XNetClearAll()
XNet::XNet() XNet::XNet()
{ {
nodes.Clear(); nodes.Clear();
isGradEfficient = true; isGradEfficient = false;
} }
/* de-constructor */ /* de-constructor */
...@@ -79,13 +79,13 @@ backward propagation to obtain gradient ...@@ -79,13 +79,13 @@ backward propagation to obtain gradient
*/ */
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
XList golds(1); TensorList golds(1);
golds.Add(NULL); golds.Add(NULL);
XList paddings(1); TensorList paddings(1);
paddings.Add(NULL); paddings.Add(NULL);
Backward(roots, golds, paddings, loss); Backward(roots, golds, paddings, loss);
...@@ -99,13 +99,13 @@ backward propagation to obtain gradient wrt. the loss/error function ...@@ -99,13 +99,13 @@ backward propagation to obtain gradient wrt. the loss/error function
*/ */
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
XList golds(1); TensorList golds(1);
golds.Add(&gold); golds.Add(&gold);
XList paddings(1); TensorList paddings(1);
paddings.Add(NULL); paddings.Add(NULL);
Backward(roots, golds, paddings, loss); Backward(roots, golds, paddings, loss);
...@@ -120,13 +120,13 @@ backward propagation to obtain gradient wrt. the loss/error function ...@@ -120,13 +120,13 @@ backward propagation to obtain gradient wrt. the loss/error function
*/ */
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss) void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
XList golds(1); TensorList golds(1);
golds.Add(&gold); golds.Add(&gold);
XList paddings(1); TensorList paddings(1);
paddings.Add(&padding); paddings.Add(&padding);
Backward(roots, golds, paddings, loss); Backward(roots, golds, paddings, loss);
...@@ -138,10 +138,10 @@ with a number of root nodes ...@@ -138,10 +138,10 @@ with a number of root nodes
>> roots - a list of root nodes (output) of the network >> roots - a list of root nodes (output) of the network
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
{ {
XList golds(roots.count); TensorList golds(roots.count);
XList paddings(roots.count); TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) { for (int i = 0; i < roots.count; i++) {
golds.Add(NULL); golds.Add(NULL);
paddings.Add(NULL); paddings.Add(NULL);
...@@ -157,9 +157,9 @@ with a number of root nodes ...@@ -157,9 +157,9 @@ with a number of root nodes
>> golds - a list of gold standard for the output >> golds - a list of gold standard for the output
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
{ {
XList paddings(roots.count); TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) for (int i = 0; i < roots.count; i++)
paddings.Add(NULL); paddings.Add(NULL);
...@@ -174,7 +174,7 @@ with a number of root nodes ...@@ -174,7 +174,7 @@ with a number of root nodes
>> paddings - specify a target value that is ignored >> paddings - specify a target value that is ignored
>> loss - name of loss function >> loss - name of loss function
*/ */
void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss) void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss)
{ {
Traverse(roots); Traverse(roots);
...@@ -190,18 +190,18 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N ...@@ -190,18 +190,18 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N
XLossGrad lossGrad; XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */ /* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){ /*for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i); XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i); XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i); XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income; XLink &income = root->income;
int funcID = income.typeID; int funcID = income.typeID;
void * params = income.params; void * params = income.params;*/
/* we compute dE/dx if the output is generated by an activation function y = f(x). /* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */ folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){ /*if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) { if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0]; XTensor * x = income.tails[0];
XNoder::MakeGrad(x); XNoder::MakeGrad(x);
...@@ -212,13 +212,13 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N ...@@ -212,13 +212,13 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N
XNoder::MakeGrad(root); XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss); lossGrad.Compute(gold, root, root->grad, padding, loss);
} }
} }*/
/* we compuate dE/dy (y is the output) if no predefined activation function is used */ /* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{ /*else{
XNoder::MakeGrad(root); XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss); lossGrad.Compute(gold, root, root->grad, NULL, loss);
} }
} }*/
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
...@@ -266,6 +266,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -266,6 +266,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
XFuncGrad::MakeGrad(node, isEfficent); XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
...@@ -300,7 +302,7 @@ depth-first search (Tarjan's algorithm) ...@@ -300,7 +302,7 @@ depth-first search (Tarjan's algorithm)
*/ */
void XNet::Traverse(XTensor &root) void XNet::Traverse(XTensor &root)
{ {
XList roots(1); TensorList roots(1);
roots.Add(&root); roots.Add(&root);
Traverse(roots); Traverse(roots);
...@@ -311,7 +313,7 @@ traverse the net and find the topological order by ...@@ -311,7 +313,7 @@ traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes) >> roots - a list of roots (or output nodes)
*/ */
void XNet::Traverse(XList &roots) void XNet::Traverse(TensorList &roots)
{ {
id = MakeNetID(); id = MakeNetID();
nodes.Clear(); nodes.Clear();
...@@ -336,7 +338,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering) ...@@ -336,7 +338,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering)
>> orders - topological order of the nodes >> orders - topological order of the nodes
>> code - code of the network >> code - code of the network
*/ */
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code) void XNet::TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code)
{ {
if(node == NULL) if(node == NULL)
return; return;
...@@ -444,7 +446,7 @@ show network topology ...@@ -444,7 +446,7 @@ show network topology
*/ */
void XNet::ShowNetwork(FILE * file, XTensor * node) void XNet::ShowNetwork(FILE * file, XTensor * node)
{ {
XList roots(1); TensorList roots(1);
roots.Add(node); roots.Add(node);
Traverse(roots); Traverse(roots);
...@@ -459,14 +461,14 @@ void XNet::ShowNetwork(FILE * file, XTensor * node) ...@@ -459,14 +461,14 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
} }
/* /*
search for a node in a top-down manner by its name search for a node in a top-down manner by its name
>> top - the top most node >> top - the top most node
<< return - the node we found << return - the node we found
*/ */
XTensor * XNet::SearchNode(XTensor * top, const char * name) //XTensor * XNet::SearchNode(XTensor * top, const char * name)
{ //{
return XLink::SearchNode(top, name); //return XLink::SearchNode(top, name);
} //}
} }
\ No newline at end of file
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h" #include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XNET_H__ #ifndef __XNET_H__
#define __XNET_H__ #define __XNET_H__
...@@ -36,16 +37,16 @@ struct XNet ...@@ -36,16 +37,16 @@ struct XNet
unsigned int id; unsigned int id;
/* tensor nodes of the network (in order) */ /* tensor nodes of the network (in order) */
XList nodes; TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/ /* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes; TensorList gradNodes;
/* output nodes of the network */ /* output nodes of the network */
XList outputs; TensorList outputs;
/* input nodes of the network */ /* input nodes of the network */
XList inputs; TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */ /* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient; bool isGradEfficient;
...@@ -70,15 +71,15 @@ struct XNet ...@@ -70,15 +71,15 @@ struct XNet
/* backward propagation to obtain gradient /* backward propagation to obtain gradient
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient /* backward propagation to obtain gradient
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false); void BackwardNode(XTensor * node, bool isEfficent = false);
...@@ -92,10 +93,10 @@ struct XNet ...@@ -92,10 +93,10 @@ struct XNet
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots); void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */ /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code); void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */ /* dump network information */
void Dump(FILE * file); void Dump(FILE * file);
...@@ -113,8 +114,8 @@ struct XNet ...@@ -113,8 +114,8 @@ struct XNet
void ShowNetwork(FILE * file, XTensor * node); void ShowNetwork(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */ /* search a node in a top-down manner by its name */
static //static
XTensor * SearchNode(XTensor * top, const char * name); //XTensor * SearchNode(XTensor * top, const char * name);
}; };
/* we make a unique id for every tensor */ /* we make a unique id for every tensor */
......
...@@ -247,13 +247,13 @@ void Check(FNNModel &model) ...@@ -247,13 +247,13 @@ void Check(FNNModel &model)
/* make a hard copy of the fnn model */ /* make a hard copy of the fnn model */
void Copy(FNNModel &tgt, FNNModel &src) void Copy(FNNModel &tgt, FNNModel &src)
{ {
InitTensor(&tgt.embeddingW, &src.embeddingW); InitTensorV2(&tgt.embeddingW, &src.embeddingW);
for(int i = 0; i < MAX_HIDDEN_NUM; i++){ for(int i = 0; i < MAX_HIDDEN_NUM; i++){
InitTensor(&tgt.hiddenW[i], &src.hiddenW[i]); InitTensorV2(&tgt.hiddenW[i], &src.hiddenW[i]);
InitTensor(&tgt.hiddenB[i], &src.hiddenB[i]); InitTensorV2(&tgt.hiddenB[i], &src.hiddenB[i]);
} }
InitTensor(&tgt.outputW, &src.outputW); InitTensorV2(&tgt.outputW, &src.outputW);
InitTensor(&tgt.outputB, &src.outputB); InitTensorV2(&tgt.outputB, &src.outputB);
tgt.n = src.n; tgt.n = src.n;
tgt.eSize = src.eSize; tgt.eSize = src.eSize;
...@@ -310,7 +310,7 @@ initialize a 1d tensor using the fnn model setting ...@@ -310,7 +310,7 @@ initialize a 1d tensor using the fnn model setting
*/ */
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model) void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model)
{ {
InitTensor1D(&tensor, num, X_FLOAT, model.devID, model.mem); InitTensor1DV2(&tensor, num, X_FLOAT, model.devID);
} }
/* /*
...@@ -322,7 +322,7 @@ initialize a 2d tensor using the fnn model setting ...@@ -322,7 +322,7 @@ initialize a 2d tensor using the fnn model setting
*/ */
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model) void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model)
{ {
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, model.devID, model.mem); InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, model.devID);
} }
...@@ -449,6 +449,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -449,6 +449,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the gold standard */ /* the gold standard */
XTensor gold; XTensor gold;
/* the loss tensor */
XTensor lossTensor;
/* make the input tensor for position i */ /* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++) for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem); MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem);
...@@ -466,6 +469,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -466,6 +469,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* forward computation */ /* forward computation */
Forward(inputs, output, model, net); Forward(inputs, output, model, net);
/* backward computation to obtain gradients */ /* backward computation to obtain gradients */
Backward(inputs, output, gold, CROSSENTROPY, model, grad, net); Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
...@@ -483,9 +488,11 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -483,9 +488,11 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* this is implemented by multiply function */ /* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model); //ForwardAutoDiff(inputs, output, model);
lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */ /* automatic differentiation */
autoDiffer.Backward(output, gold, CROSSENTROPY); autoDiffer.Backward(lossTensor);
//autoDiffer.Backward(output, gold, CROSSENTROPY);
/* update model parameters */ /* update model parameters */
Update(model, grad, learningRate, true); Update(model, grad, learningRate, true);
...@@ -493,8 +500,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -493,8 +500,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* get probabilities */ /* get probabilities */
float prob = GetProb(output, gold); float prob = GetProb(output, gold);
loss += -prob; prob = ReduceSumAll(lossTensor);
loss += prob;
wordCount += ngramNum; wordCount += ngramNum;
wordCountTotal += ngramNum; wordCountTotal += ngramNum;
...@@ -537,8 +546,8 @@ update the model parameters using the delta rule ...@@ -537,8 +546,8 @@ update the model parameters using the delta rule
*/ */
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad) void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
{ {
XList paraList(10); TensorList paraList(10);
XList gradList(10); TensorList gradList(10);
paraList.Add(&model.outputW); paraList.Add(&model.outputW);
paraList.Add(&model.outputB); paraList.Add(&model.outputB);
...@@ -595,14 +604,14 @@ get prediction probabilites of the gold words ...@@ -595,14 +604,14 @@ get prediction probabilites of the gold words
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
{ {
XTensor probs; XTensor probs;
InitTensor(&probs, &output); InitTensorV2(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */ /* probs[i,j] = output[i,j] * gold[i,j] */
_Multiply(&output, &gold, &probs); _Multiply(&output, &gold, &probs);
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1D(&wprobs, output.GetDim(0), output.dataType, output.devID, output.mem); InitTensor1DV2(&wprobs, output.GetDim(0), output.dataType, output.devID);
_ReduceSum(&probs, &wprobs, 1); _ReduceSum(&probs, &wprobs, 1);
if(wordProbs != NULL) if(wordProbs != NULL)
_CopyValues(&wprobs, wordProbs); _CopyValues(&wprobs, wordProbs);
...@@ -616,7 +625,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) ...@@ -616,7 +625,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1D(&result, 1, X_FLOAT, output.devID, output.mem); InitTensor1DV2(&result, 1, X_FLOAT, output.devID);
_ReduceSum(&probs, &result, 1); _ReduceSum(&probs, &result, 1);
return result.Get1D(0); return result.Get1D(0);
...@@ -718,7 +727,7 @@ The indexed cell is set to 1, and 0 otherwise. ...@@ -718,7 +727,7 @@ The indexed cell is set to 1, and 0 otherwise.
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols, void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID, XMem * mem) int itemNum, int devID, XMem * mem)
{ {
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, devID, mem); InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, devID);
tensor.SetZeroAll(); tensor.SetZeroAll();
...@@ -765,7 +774,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -765,7 +774,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
int batchSize = -1; int batchSize = -1;
int n = model.n; int n = model.n;
int depth = model.hDepth; int depth = model.hDepth;
XList eList(n - 1); TensorList eList(n - 1);
/* previoius n - 1 words */ /* previoius n - 1 words */
for(int i = 0; i < n - 1; i++){ for(int i = 0; i < n - 1; i++){
...@@ -811,7 +820,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -811,7 +820,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* make a 2d tensor for the bias term */ /* make a 2d tensor for the bias term */
XTensor b2D; XTensor b2D;
InitTensor(&b2D, &s); InitTensorV2(&b2D, &s);
_Unsqueeze(&b, &b2D, 0, batchSize); _Unsqueeze(&b, &b2D, 0, batchSize);
/* introduce bias term: /* introduce bias term:
...@@ -843,7 +852,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -843,7 +852,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
_MatrixMul(&h_last, X_NOTRANS, &w, X_NOTRANS, &s); _MatrixMul(&h_last, X_NOTRANS, &w, X_NOTRANS, &s);
XTensor b2D; XTensor b2D;
InitTensor(&b2D, &s); InitTensorV2(&b2D, &s);
_Unsqueeze(&b, &b2D, 0, batchSize); _Unsqueeze(&b, &b2D, 0, batchSize);
_Sum(&s, &b2D, &s); _Sum(&s, &b2D, &s);
...@@ -908,8 +917,8 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -908,8 +917,8 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
XTensor dedsHidden; XTensor dedsHidden;
XTensor dedxBottom; XTensor dedxBottom;
if (depth > 0) if (depth > 0)
InitTensor(&dedsHidden, &dedx); InitTensorV2(&dedsHidden, &dedx);
InitTensor(&dedxBottom, &net.embeddingCat); InitTensorV2(&dedxBottom, &net.embeddingCat);
/* back-propagation from top to bottom in the stack of hidden layers /* back-propagation from top to bottom in the stack of hidden layers
for each layer, h = f(s) for each layer, h = f(s)
...@@ -943,11 +952,11 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -943,11 +952,11 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
_CopyValues(&dedx, &gradPassed); _CopyValues(&dedx, &gradPassed);
} }
XList eList(n - 1); TensorList eList(n - 1);
/* back-propagation for the embedding layer */ /* back-propagation for the embedding layer */
for (int i = 0; i < n - 1; i++) { for (int i = 0; i < n - 1; i++) {
XTensor * dedy = NewTensor2D(batchSize, model.eSize, X_FLOAT, model.devID, model.mem); XTensor * dedy = NewTensor2DV2(batchSize, model.eSize, X_FLOAT, model.devID);
eList.Add(dedy); eList.Add(dedy);
} }
...@@ -999,7 +1008,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model ...@@ -999,7 +1008,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
} }
} }
InitTensor1D(&words, size, X_INT, model.devID, model.mem); InitTensor1DV2(&words, size, X_INT, model.devID);
words.SetData(index, size); words.SetData(index, size);
embeddingBig = Gather(model.embeddingW, words); embeddingBig = Gather(model.embeddingW, words);
...@@ -1017,7 +1026,8 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model ...@@ -1017,7 +1026,8 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
hidden = HardTanH(MMul(hidden, model.hiddenW[i]) + model.hiddenB[i]); hidden = HardTanH(MMul(hidden, model.hiddenW[i]) + model.hiddenB[i]);
/* output layer */ /* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1); //output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
output = Softmax(MMul(hidden, model.outputW) + model.outputB, 1);
} }
/* /*
...@@ -1036,7 +1046,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model) ...@@ -1036,7 +1046,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
XTensor hidden; XTensor hidden;
XTensor b; XTensor b;
XList inputList(n - 1); TensorList inputList(n - 1);
for(int i = 0; i < n - 1; i++) for(int i = 0; i < n - 1; i++)
inputList.Add(inputs + i); inputList.Add(inputs + i);
...@@ -1177,7 +1187,7 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1177,7 +1187,7 @@ void Test(const char * test, const char * result, FNNModel &model)
/* prediction probabilities */ /* prediction probabilities */
XTensor probs; XTensor probs;
InitTensor1D(&probs, ngramNum); InitTensor1DV2(&probs, ngramNum);
/* get probabilities */ /* get probabilities */
float prob = GetProb(output, gold, &probs); float prob = GetProb(output, gold, &probs);
......
...@@ -127,7 +127,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -127,7 +127,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
XTensor q2; XTensor q2;
XTensor v2; XTensor v2;
XTensor kqv2; XTensor kqv2;
XList split; TensorList split;
kqv2 = MMul(kqv, wbig); kqv2 = MMul(kqv, wbig);
......
...@@ -253,7 +253,7 @@ void T2TBatchLoader::ClearBuf() ...@@ -253,7 +253,7 @@ void T2TBatchLoader::ClearBuf()
nseqBuf = 0; nseqBuf = 0;
nextSeq = -1; nextSeq = -1;
} }
/* /*
set the random batch flag set the random batch flag
>> flag - as it is >> flag - as it is
...@@ -697,4 +697,4 @@ void T2TBatchLoader::Shuffle(const char * srcFile, const char * tgtFile) ...@@ -697,4 +697,4 @@ void T2TBatchLoader::Shuffle(const char * srcFile, const char * tgtFile)
} }
} }
\ No newline at end of file
...@@ -119,7 +119,7 @@ public: ...@@ -119,7 +119,7 @@ public:
/* clear data buffer */ /* clear data buffer */
void ClearBuf(); void ClearBuf();
/* set the random batch flag */ /* set the random batch flag */
void SetRandomBatch(bool flag = true); void SetRandomBatch(bool flag = true);
...@@ -159,4 +159,4 @@ public: ...@@ -159,4 +159,4 @@ public:
}; };
} }
#endif #endif
\ No newline at end of file
...@@ -103,4 +103,4 @@ public: ...@@ -103,4 +103,4 @@ public:
} }
#endif #endif
\ No newline at end of file
...@@ -85,7 +85,7 @@ void T2TModel::InitModel(int argc, char ** argv) ...@@ -85,7 +85,7 @@ void T2TModel::InitModel(int argc, char ** argv)
if(isMT) if(isMT)
decoder->InitModel(argc, argv, true, 0, devID, mem); decoder->InitModel(argc, argv, true, 0, devID, mem);
XList params(10); TensorList params(10);
GetParams(params); GetParams(params);
for(int i = 0; i < params.count; i++){ for(int i = 0; i < params.count; i++){
...@@ -371,9 +371,9 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -371,9 +371,9 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
//maskDec.Dump(stderr, "mask: "); //maskDec.Dump(stderr, "mask: ");
_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F); _ScaleAndShiftMe(&maskDec, 1.0F, -1e9F);
//maskDec.Dump(stderr, "mask: ");
//maskDec.Dump(stderr, "mask: ");
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem);
...@@ -383,16 +383,16 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -383,16 +383,16 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
//paddingEnc.Dump(stderr, "paddingenc:"); //paddingEnc.Dump(stderr, "paddingenc:");
//maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:"); //maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:");
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F); _ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
//maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:"); //maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:");
_Unsqueeze(maskEncDecTMPEnc, &maskEncDec, 0, dims[0]); _Unsqueeze(maskEncDecTMPEnc, &maskEncDec, 0, dims[0]);
//maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:"); //maskEncDecTMPEnc->Dump(stderr, "maskencdectmpenc:");
DelTensorBuf(maskEncDecTMPDec); DelTensorBuf(maskEncDecTMPDec);
...@@ -403,7 +403,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -403,7 +403,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
get parameter matrics get parameter matrics
>> list - the list that keeps the parameter matrics >> list - the list that keeps the parameter matrics
*/ */
void T2TModel::GetParams(XList &list) void T2TModel::GetParams(TensorList &list)
{ {
list.Clear(); list.Clear();
list.Add(&outputLayer->w); list.Add(&outputLayer->w);
...@@ -465,7 +465,7 @@ void T2TModel::Dump(const char * fn) ...@@ -465,7 +465,7 @@ void T2TModel::Dump(const char * fn)
FILE * file = fopen(fn, "wb"); FILE * file = fopen(fn, "wb");
CheckNTErrors(file, "Cannot open the model file"); CheckNTErrors(file, "Cannot open the model file");
XList params(100); TensorList params(100);
GetParams(params); GetParams(params);
...@@ -489,7 +489,7 @@ void T2TModel::Read(const char * fn) ...@@ -489,7 +489,7 @@ void T2TModel::Read(const char * fn)
FILE * file = fopen(fn, "rb"); FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot open the model file"); CheckNTErrors(file, "Cannot open the model file");
XList params(100); TensorList params(100);
GetParams(params); GetParams(params);
......
...@@ -98,7 +98,7 @@ public: ...@@ -98,7 +98,7 @@ public:
XTensor &maskDec, XTensor &maskEncDec); XTensor &maskDec, XTensor &maskEncDec);
/* get parameter matrics */ /* get parameter matrics */
void GetParams(XList &list); void GetParams(TensorList &list);
/* dump the parameters */ /* dump the parameters */
void Dump(const char * fn); void Dump(const char * fn);
......
...@@ -93,9 +93,8 @@ void T2TOutput::Make(XTensor &input, XTensor &output) ...@@ -93,9 +93,8 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{ {
XTensor &x = input; XTensor &x = input;
output = LogSoftmax(MMul(x, w), -1); //output = LogSoftmax(MMul(x, w), -1);
//output = Softmax(MMul(x, w), -1); output = Softmax(MMul(x, w), -1);
output.SetName(OUTPUT_NAME); output.SetName(OUTPUT_NAME);
} }
......
...@@ -73,4 +73,4 @@ public: ...@@ -73,4 +73,4 @@ public:
} }
#endif #endif
\ No newline at end of file
...@@ -115,7 +115,7 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input ...@@ -115,7 +115,7 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
state->stateNum = 0; state->stateNum = 0;
} }
/* /*
set start symbol set start symbol
>> symbol - the symbol (in integer) >> symbol - the symbol (in integer)
...@@ -146,7 +146,7 @@ predict the next state ...@@ -146,7 +146,7 @@ predict the next state
>> inputEnc - input of the encoder >> inputEnc - input of the encoder
>> paddingEnc - padding of the encoder >> paddingEnc - padding of the encoder
*/ */
void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
XTensor * inputEnc, XTensor * paddingEnc) XTensor * inputEnc, XTensor * paddingEnc)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
...@@ -174,12 +174,13 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -174,12 +174,13 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
_SetDataFixedInt(&first, startSymbol); _SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
if(inputLast == NULL){ if (inputLast == NULL) {
inputDec = Identity(first); inputDec = Identity(first);
} }
else{ else{
inputDec = GeneratePaths(s); inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID, inputEnc->mem); inputDec.SetDevice(inputEnc->devID, inputEnc->mem);
inputDec = Concatenate(first, inputDec, inputDec.order - 1); inputDec = Concatenate(first, inputDec, inputDec.order - 1);
} }
......
...@@ -49,7 +49,7 @@ public: ...@@ -49,7 +49,7 @@ public:
/* indicates whether the state is the start */ /* indicates whether the state is the start */
bool isStart; bool isStart;
/* indicates whether the state is completed */ /* indicates whether the state is completed */
bool isCompleted; bool isCompleted;
...@@ -96,10 +96,10 @@ public: ...@@ -96,10 +96,10 @@ public:
/* layers on the encoder side. We actually use the encoder output instead /* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */ of all hidden layers. */
XList layersEnc; TensorList layersEnc;
/* layers on the decoder side */ /* layers on the decoder side */
XList layersDec; TensorList layersDec;
/* list of states */ /* list of states */
T2TState * states; T2TState * states;
...@@ -134,7 +134,7 @@ private: ...@@ -134,7 +134,7 @@ private:
/* current state */ /* current state */
T2TStateBundle * s; T2TStateBundle * s;
/* start symbol */ /* start symbol */
int startSymbol; int startSymbol;
...@@ -147,7 +147,7 @@ public: ...@@ -147,7 +147,7 @@ public:
/* create an initial state */ /* create an initial state */
void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state); void Create(T2TModel * model, XTensor * top, const XTensor * input, int beamSize, T2TStateBundle * state);
/* set the start symbol */ /* set the start symbol */
void SetStartSymbol(int symbol); void SetStartSymbol(int symbol);
......
...@@ -62,7 +62,7 @@ void T2TSearch::Init(int argc, char ** argv) ...@@ -62,7 +62,7 @@ void T2TSearch::Init(int argc, char ** argv)
LoadParamFloat(argc, argv, "lenalpha", &alpha, 0.2F); LoadParamFloat(argc, argv, "lenalpha", &alpha, 0.2F);
LoadParamInt(argc, argv, "endid", endSymbols, -1); LoadParamInt(argc, argv, "endid", endSymbols, -1);
LoadParamInt(argc, argv, "startid", &startSymbol, -1); LoadParamInt(argc, argv, "startid", &startSymbol, -1);
if(endSymbols[0] >= 0) if(endSymbols[0] >= 0)
endSymbolNum = 1; endSymbolNum = 1;
} }
...@@ -78,7 +78,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -78,7 +78,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
{ {
T2TPredictor predictor; T2TPredictor predictor;
XTensor maskEnc; XTensor maskEnc;
XTensor encoding; XTensor encoding;
XTensor encodingBeam; XTensor encodingBeam;
XTensor inputBeam; XTensor inputBeam;
XTensor paddingBeam; XTensor paddingBeam;
...@@ -90,7 +90,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe ...@@ -90,7 +90,7 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
/* encoder mask */ /* encoder mask */
model->MakeMTMaskEnc(*input, *padding, maskEnc); model->MakeMTMaskEnc(*input, *padding, maskEnc);
//input->Dump(stderr, "input:"); //input->Dump(stderr, "input:");
//maskEnc.Dump(stderr, "maskenc:"); //maskEnc.Dump(stderr, "maskenc:");
...@@ -197,12 +197,13 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -197,12 +197,13 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
prob.Reshape(prob.unitNum/outputSize, outputSize); prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize); score.Reshape(score.unitNum/outputSize, outputSize);
probPath.Reshape(score.unitNum/outputSize, outputSize); probPath.Reshape(score.unitNum / outputSize, outputSize);
probPathPrev.Reshape(probPathPrev.unitNum); probPathPrev.Reshape(probPathPrev.unitNum);
/* the log-scale probability of the entire sequence */ /* the log-scale probability of the entire sequence */
_SumDim(&prob, &probPathPrev, &probPath, 0); _SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensor(&len, &lenPrev); InitTensor(&len, &lenPrev);
InitTensor(&lp, &lenPrev); InitTensor(&lp, &lenPrev);
...@@ -275,8 +276,8 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -275,8 +276,8 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsBeam[order - 3] /= beamSize; dimsBeam[order - 3] /= beamSize;
dimsBeam[order - 1] *= beamSize; dimsBeam[order - 1] *= beamSize;
dimsTopK[order - 3] = dimsBeam[order - 3]; dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize; dimsTopK[order - 1] = beamSize;
InitTensor(&scoreTopK, order, dimsTopK, score.dataType, InitTensor(&scoreTopK, order, dimsTopK, score.dataType,
1.0F, score.devID, score.mem); 1.0F, score.devID, score.mem);
...@@ -302,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -302,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* Then, we do something similar to "preID". For the top-k predictions, we need /* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */ in the vocabulary by dividing it with vocab-size and computing the remainder. */
Mod(index, sizeVocab); _ModMe(index, sizeVocab);
score.Reshape(order, dims); score.Reshape(order, dims);
...@@ -314,37 +315,39 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -314,37 +315,39 @@ void T2TSearch::Generate(T2TStateBundle * beam)
XTensor indexCPU; XTensor indexCPU;
InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1); InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1);
CopyValues(index, indexCPU); CopyValues(index, indexCPU);
for(int i = 0; i < indexCPU.unitNum; i++)
for (int i = 0; i < indexCPU.unitNum; i++)
indexCPU.SetInt(i * stride + indexCPU.GetInt(i), i); indexCPU.SetInt(i * stride + indexCPU.GetInt(i), i);
CheckNTErrors(XTensor::IsSameShaped(&prob, &probPath), "Wrong tensor shape!"); CheckNTErrors(XTensor::IsSameShaped(&prob, &probPath), "Wrong tensor shape!");
/* sequence probability and prediction probability of top-k candidates */ /* sequence probability of top-k candidates */
XTensor probPathTopK; XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK); InitTensor(&probPathTopK, &scoreTopK);
XTensor probTopK; XTensor probTopK;
InitTensor(&probTopK, &scoreTopK); InitTensor(&probTopK, &scoreTopK);
for(int i = 0; i < probPath.order; i++){ for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i); dims[i] = probPath.GetDim(i);
dimsTopK[i] = probPathTopK.GetDim(i); dimsTopK[i] = probPathTopK.GetDim(i);
} }
order = probPath.order; order = probPath.order;
probPath.Reshape(1, probPath.unitNum); probPath.Reshape(1, probPath.unitNum);
probPathTopK.Reshape(1, probPathTopK.unitNum); probPathTopK.Reshape(1, probPathTopK.unitNum);
prob.Reshape(1, prob.unitNum); prob.Reshape(1, prob.unitNum);
probTopK.Reshape(1, probTopK.unitNum); probTopK.Reshape(1, probTopK.unitNum);
_Gather(&probPath, &probPathTopK, probPathTopK.order - 1, (int*)indexCPU.data, indexCPU.unitNum); _Gather(&probPath, &probPathTopK, probPathTopK.order - 1, (int*)indexCPU.data, indexCPU.unitNum);
_Gather(&prob, &probTopK, probTopK.order - 1, (int*)indexCPU.data, indexCPU.unitNum); _Gather(&prob, &probTopK, probTopK.order - 1, (int*)indexCPU.data, indexCPU.unitNum);
probPath.Reshape(order, dims); probPath.Reshape(order, dims);
probPathTopK.Reshape(order, dimsTopK); probPathTopK.Reshape(order, dimsTopK);
prob.Reshape(order, dims); prob.Reshape(order, dims);
probTopK.Reshape(order, dimsTopK); probTopK.Reshape(order, dimsTopK);
probPath = probPathTopK; probPath = probPathTopK;
prob = probTopK; prob = probTopK;
} }
...@@ -396,7 +399,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -396,7 +399,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
modification of the states. An alternative is to do this on GPUs but modification of the states. An alternative is to do this on GPUs but
it needs much more coding work and the speed-up is not obvious. */ it needs much more coding work and the speed-up is not obvious. */
for(int i = 0; i < beam->stateNum; i += beamSize){ for(int i = 0; i < beam->stateNum; i += beamSize){
for(int j = 0; j < beamSize; j++){ for (int j = 0; j < beamSize; j++) {
int k = i + j; int k = i + j;
T2TState & state = states[k]; T2TState & state = states[k];
...@@ -405,7 +408,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -405,7 +408,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
T2TState * last = prev->states + pid * beamSize + offset; T2TState * last = prev->states + pid * beamSize + offset;
CheckNTErrors(offset >= 0, "Wrong state index!"); CheckNTErrors(offset >= 0, "Wrong state index!");
/* pointer to the previous state */ /* pointer to the previous state */
if (prev->isStart) { if (prev->isStart) {
state.last = NULL; state.last = NULL;
...@@ -413,14 +416,14 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -413,14 +416,14 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
state.nstep = 0; state.nstep = 0;
state.isCompleted = false; state.isCompleted = false;
} }
else{ else {
state.last = last; state.last = last;
state.pid = state.last->pid; state.pid = state.last->pid;
state.nstep = last->nstep + 1; state.nstep = last->nstep + 1;
state.isCompleted = last->isCompleted; state.isCompleted = last->isCompleted;
CheckNTErrors(offset < prev->stateNum, "Wrong state index!"); CheckNTErrors(offset < prev->stateNum, "Wrong state index!");
} }
/* scores */ /* scores */
state.modelScore = modelScore.Get(k); state.modelScore = modelScore.Get(k);
state.prob = prob.Get(k); state.prob = prob.Get(k);
...@@ -517,7 +520,7 @@ void T2TSearch::Dump(XTensor * output) ...@@ -517,7 +520,7 @@ void T2TSearch::Dump(XTensor * output)
/* we track the state from the end to the beginning */ /* we track the state from the end to the beginning */
while(state != NULL){ while(state != NULL){
if(!state->isCompleted) if (!state->isCompleted)
isCompleted = false; isCompleted = false;
if (isCompleted) if (isCompleted)
words[count++] = -1; words[count++] = -1;
...@@ -571,7 +574,7 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum) ...@@ -571,7 +574,7 @@ void T2TSearch::SetEnd(const int * tokens, const int tokenNum)
endSymbolNum = tokenNum; endSymbolNum = tokenNum;
} }
/* /*
make a mask to prevent duplicated entries in beam expansion for the first position make a mask to prevent duplicated entries in beam expansion for the first position
>> beam - the beam that keeps the searching states >> beam - the beam that keeps the searching states
*/ */
...@@ -589,7 +592,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) ...@@ -589,7 +592,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
mask.SetZeroAll(); mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) { for (int i = 0; i < mask.unitNum; i++) {
if(i % beamSize != 0) if (i % beamSize != 0)
mask.Set(-1e9, i); mask.Set(-1e9, i);
} }
......
...@@ -58,7 +58,7 @@ private: ...@@ -58,7 +58,7 @@ private:
/* number of the end symbols */ /* number of the end symbols */
int endSymbolNum; int endSymbolNum;
/* start symbol */ /* start symbol */
int startSymbol; int startSymbol;
......
...@@ -99,7 +99,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -99,7 +99,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
/* an array that keeps the sequences */ /* an array that keeps the sequences */
int * seqs = new int[MILLION]; int * seqs = new int[MILLION];
batchLoader.SetRandomBatch(false); batchLoader.SetRandomBatch(false);
batchLoader.ClearBuf(); batchLoader.ClearBuf();
...@@ -116,7 +116,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -116,7 +116,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
seacher.Search(model, &batchEnc, &paddingEnc, &output); seacher.Search(model, &batchEnc, &paddingEnc, &output);
Dump(ofile, &output); Dump(ofile, &output);
float prob = 0; float prob = 0;
loss += -prob; loss += -prob;
...@@ -144,7 +144,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -144,7 +144,7 @@ void T2TTester::Test(const char * fn, const char * ofn, T2TModel * model)
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n", XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, exp(loss/wordCount)); elapsed,wordCountTotal, exp(loss/wordCount));
} }
/* /*
dump the result into the file dump the result into the file
>> file - data file >> file - data file
...@@ -153,9 +153,9 @@ dump the result into the file ...@@ -153,9 +153,9 @@ dump the result into the file
void T2TTester::Dump(FILE * file, XTensor * output) void T2TTester::Dump(FILE * file, XTensor * output)
{ {
int seqLength = output->GetDim(-1); int seqLength = output->GetDim(-1);
for(int i = 0; i < output->unitNum; i += seqLength){ for (int i = 0; i < output->unitNum; i += seqLength) {
for(int j = 0; j < seqLength; j++){ for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j); int w = output->GetInt(i + j);
fprintf(file, "%d ", w); fprintf(file, "%d ", w);
if (w < 0) if (w < 0)
......
...@@ -57,11 +57,11 @@ public: ...@@ -57,11 +57,11 @@ public:
/* test the model */ /* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model); void Test(const char * fn, const char * ofn, T2TModel * model);
/* dump the result into the file */ /* dump the result into the file */
void Dump(FILE * file, XTensor * output); void Dump(FILE * file, XTensor * output);
}; };
} }
#endif #endif
\ No newline at end of file
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "T2TUtility.h" #include "T2TUtility.h"
#include "../../tensor/XUtility.h" #include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
#include "../../tensor/loss/LHeader.h"
#include "../../network/XNoder.h" #include "../../network/XNoder.h"
#ifndef WIN32 #ifndef WIN32
...@@ -209,13 +210,16 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -209,13 +210,16 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
labelOnehot = IndexToOnehot(label, vSizeTgt, labelSmoothingP); labelOnehot = IndexToOnehot(label, vSizeTgt, labelSmoothingP);
/* make paddings for the output */ /* make paddings for the output */
if (output.GetDim(0) > 0) //if (output.GetDim(0) > 0)
PadOutput(&output, &labelOnehot, &paddingDec); //PadOutput(&output, &labelOnehot, &paddingDec);
/* get probabilities */ /* get probabilities */
float prob = GetProb(&output, &labelOnehot, NULL); //float prob = GetProb(&output, &labelOnehot, NULL);
XTensor lossTensor;
lossTensor = CrossEntropy(output, labelOnehot, paddingDec);
float prob = ReduceSumAll(lossTensor);
DTYPE lossLocal = -prob / wc; DTYPE lossLocal = prob / wc;
bool doUpdate = (!IsNAN(lossLocal) && !IsINF(lossLocal) && lossLocal < 1e3F); bool doUpdate = (!IsNAN(lossLocal) && !IsINF(lossLocal) && lossLocal < 1e3F);
//XTensor &g = labelSmoothingP > 0 ? goldSmoothed : gold; //XTensor &g = labelSmoothingP > 0 ? goldSmoothed : gold;
...@@ -223,14 +227,15 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -223,14 +227,15 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if (doUpdate) { if (doUpdate) {
/* recale the output for normalized loss */ /* recale the output for normalized loss */
RescaleOutput(&output, &labelOnehot, &paddingDec); //RescaleOutput(&output, &labelOnehot, &paddingDec);
/* back-propagation */ /* back-propagation */
net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY); net.Backward(lossTensor);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY); //net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
gradStep += 1; gradStep += 1;
loss += -prob; loss += prob;
wordCount += wc; wordCount += wc;
wordCountTotal += wc; wordCountTotal += wc;
...@@ -260,7 +265,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -260,7 +265,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if (step % 100 == 0) { if (step % 100 == 0) {
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT8(0, stderr, "[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f", XPRINT8(0, stderr, "[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f",
elapsed, step, epoch, wordCountTotal, wordCountBatch, loss/wordCount, exp(loss/wordCount), exp(-prob/wc)); elapsed, step, epoch, wordCountTotal, wordCountBatch, loss/wordCount, exp(loss/wordCount), exp(prob/wc));
if (!doUpdate) if (!doUpdate)
XPRINT(0, stderr, " (no update)"); XPRINT(0, stderr, " (no update)");
XPRINT(0, stderr, "\n"); XPRINT(0, stderr, "\n");
...@@ -491,7 +496,7 @@ where ...@@ -491,7 +496,7 @@ where
*/ */
void T2TTrainer::Update(T2TModel * model, const float lr) void T2TTrainer::Update(T2TModel * model, const float lr)
{ {
XList ws(100); TensorList ws(100);
model->GetParams(ws); model->GetParams(ws);
...@@ -552,7 +557,7 @@ void T2TTrainer::PrepareModel(T2TModel * model) ...@@ -552,7 +557,7 @@ void T2TTrainer::PrepareModel(T2TModel * model)
moments.Clear(); moments.Clear();
moments2nd.Clear(); moments2nd.Clear();
XList ws(100); TensorList ws(100);
model->GetParams(ws); model->GetParams(ws);
......
...@@ -82,10 +82,10 @@ public: ...@@ -82,10 +82,10 @@ public:
float adamBeta2T; float adamBeta2T;
/* list of the moment of the parameter matrics */ /* list of the moment of the parameter matrics */
XList moments; TensorList moments;
/* list of the 2nd order moment of the parameter matrics */ /* list of the 2nd order moment of the parameter matrics */
XList moments2nd; TensorList moments2nd;
/* indicates whether the data file is shuffled for training */ /* indicates whether the data file is shuffled for training */
bool isShuffled; bool isShuffled;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "XDevice.h" #include "XDevice.h"
#include "./test/Test.h" #include "./test/Test.h"
#include "./core/CHeader.h" #include "./core/CHeader.h"
#include "./loss/CrossEntropy.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
......
...@@ -201,7 +201,7 @@ void XDevice::SetGPUDevice(int devID) ...@@ -201,7 +201,7 @@ void XDevice::SetGPUDevice(int devID)
cudaError_t error = cudaSetDevice(devID); cudaError_t error = cudaSetDevice(devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error)); devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
...@@ -225,7 +225,7 @@ int XDevice::GetGPUDevice() ...@@ -225,7 +225,7 @@ int XDevice::GetGPUDevice()
cudaError_t error = cudaGetDevice(&devID); cudaError_t error = cudaGetDevice(&devID);
if (error != cudaSuccess){ if (error != cudaSuccess){
fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
devID, error, cudaGetErrorString(error)); devID, error, cudaGetErrorString(error));
exit(1); exit(1);
} }
...@@ -476,7 +476,7 @@ split a string ...@@ -476,7 +476,7 @@ split a string
>> items - splitting result >> items - splitting result
<< return - how many items are there << return - how many items are there
*/ */
int SplitALine(char * inputString, const char * seperator, XList * items) int SplitALine(char * inputString, const char * seperator, StrList* items)
{ {
items->Clear(); items->Clear();
...@@ -530,7 +530,7 @@ get device ids for the given device information ...@@ -530,7 +530,7 @@ get device ids for the given device information
*/ */
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{ {
XList * terms = new XList(1); StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms); SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){ for(int i = 0; i < terms->count; i++){
......
...@@ -49,7 +49,7 @@ namespace nts { ...@@ -49,7 +49,7 @@ namespace nts {
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
#define DTYPE double #define DTYPE double
#define DTYPE_MIN (DTYPE)1.79E+308 #define DTYPE_MIN (DTYPE)-1.79E+308
#else #else
#define DTYPE float #define DTYPE float
#define DTYPE_MIN (DTYPE)-3.40E+38 #define DTYPE_MIN (DTYPE)-3.40E+38
......
...@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id ...@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL) if(h == NULL)
return; return;
XList list(2); TensorList list(2);
list.Add(t1); list.Add((XTensor*)t1);
list.Add(t2); list.Add((XTensor*)t2);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, ...@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
if (h == NULL) if (h == NULL)
return; return;
XList list(3); TensorList list(3);
list.Add(t1); list.Add((XTensor*)t1);
list.Add(t2); list.Add((XTensor*)t2);
list.Add(t3); list.Add((XTensor*)t3);
MakeLink(&list, h, id); MakeLink(&list, h, id);
} }
...@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor ...@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor >> h - head tensor
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(const XList * list, XTensor * h, int id) void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
{ {
/* forward */ /* forward */
XLink &income = h->income; XLink &income = h->income;
...@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors ...@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors
>> list - a list of output tensors >> list - a list of output tensors
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(XTensor * t, XList * list, int id) void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{ {
/* forward */ /* forward */
for(int i = 0; i < list->count; i++){ for(int i = 0; i < list->count; i++){
...@@ -528,10 +528,90 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne) ...@@ -528,10 +528,90 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
CheckNTErrors(hit, "No proper node found in parent.income edge!"); CheckNTErrors(hit, "No proper node found in parent.income edge!");
} }
} }
strcpy(newOne->name, oldOne->name);
} }
/*
copy a node with another, i.e., we add the links to the new node
>> src - the node to be copied
>> tgt - the new node
*/
void XLink::Copy(const XTensor * reference, XTensor * target)
{
if (reference == NULL || target == NULL)
return;
XLink &newIncome = target->income;
XLink &newOutgo = target->outgo;
XLink::ClearOutgoing(target);
XLink::ClearIncoming(target);
/* incoming nodes */
if (reference->income.typeID != 0) {
if (newIncome.tailNum < reference->income.tailNum) {
delete[] newIncome.tails;
newIncome.tails = new XTensor*[reference->income.tailNum];
}
newIncome.SetType(reference->income.typeID);
newIncome.head = target;
newIncome.tailNum = reference->income.tailNum;
memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = reference->income.paramNum * reference->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, reference->income.params, paraArraySize);
newIncome.paramNum = reference->income.paramNum;
/* update the link to each child node */
for (int i = 0; i < newIncome.tailNum; i++) {
XTensor * child = newIncome.tails[i];
XLink &childOutgo = child->outgo;
bool hit = false;
for (int j = 0; j < childOutgo.tailNum; j++) {
if (childOutgo.tails[j] == reference) {
//childOutgo.tails[j] = target;
childOutgo.AddTail(target);
hit = true;
break;
}
}
if (childOutgo.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in child.outgo edge!");
}
}
}
if (newOutgo.tailNum < reference->outgo.tailNum) {
delete[] newOutgo.tails;
newOutgo.tails = new XTensor*[reference->outgo.tailNum];
}
/* outgoing nodes */
newOutgo.head = target;
newOutgo.tailNum = reference->outgo.tailNum;
memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
/* update the link to each parent node */
for (int i = 0; i < newOutgo.tailNum; i++) {
XTensor * parent = newOutgo.tails[i];
XLink &parentIncome = parent->income;
bool hit = false;
for (int j = 0; j < parentIncome.tailNum; j++) {
if (parentIncome.tails[j] == reference) {
//parentIncome.tails[j] = target;
parentIncome.AddTail(target);
hit = true;
}
}
if (parentIncome.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
}
/* /*
copy incoming edges of a given node copy incoming edges of a given node
>> reference - the node we copy from >> reference - the node we copy from
...@@ -544,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target) ...@@ -544,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
ClearIncoming(target); ClearIncoming(target);
int tailNum = reference->income.tailNum; int tailNum = reference->income.tailNum;
XList tails(tailNum); TensorList tails(tailNum);
for(int i = 0; i < tailNum; i++){ for(int i = 0; i < tailNum; i++){
XTensor * tail = (XTensor*)reference->income.tails[i]; XTensor * tail = (XTensor*)reference->income.tails[i];
tails.Add(tail); tails.Add(tail);
......
...@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */ /* cross reference */
struct XTensor; struct XTensor;
#define MAX_OP_NAME_LENGTH 16 #define MAX_OP_NAME_LENGTH 64
#define PARAM_UNTI_SIZE 64 #define PARAM_UNTI_SIZE 64
/* /*
...@@ -144,11 +144,11 @@ struct XLink ...@@ -144,11 +144,11 @@ struct XLink
/* create a hyper edge with a list of input tensors and a output tensor */ /* create a hyper edge with a list of input tensors and a output tensor */
static static
void MakeLink(const XList * list, XTensor * h, int id); void MakeLink(const TensorList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */ /* create a hyper edge with a input tensors and a list of output tensors */
static static
void MakeLink(XTensor * h, XList * list, int id); void MakeLink(XTensor * h, TensorList * list, int id);
/* add a parameter */ /* add a parameter */
static static
...@@ -174,6 +174,10 @@ struct XLink ...@@ -174,6 +174,10 @@ struct XLink
static static
void Replace(const XTensor * oldOne, XTensor * newOne); void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy a node with another, i.e., we add the links to the new node */
static
void Copy(const XTensor * reference, XTensor * target);
/* copy links of a given node */ /* copy links of a given node */
static static
void CopyIncoming(const XTensor * reference, XTensor * target); void CopyIncoming(const XTensor * reference, XTensor * target);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -15,32 +15,31 @@ ...@@ -15,32 +15,31 @@
* limitations under the License. * limitations under the License.
*/ */
/* /*
* *
* Implementation of list that keeps data items * Implementation of template list that keeps data items
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17 * $Created by: HU Chi (huchinlp@foxmail.com)
* The first coding job this year! *
* */
*/
#ifndef __XLIST_H__
#define __XLIST_H__
#include "XMem.h" #include "XMem.h"
#include "XGlobal.h" #include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ #ifndef __TensorList_H__
namespace nts{ #define __TensorList_H__
typedef int (* ListCompare)(const void * item1, const void * item2);
/* the XList class */ /* the nts (NiuTrans.Tensor) namespace */
class XList namespace nts {
{
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
public: public:
/* data items */ /* data items */
void ** items; T *items;
/* number of items */ /* number of items */
int count; int count;
...@@ -49,56 +48,88 @@ public: ...@@ -49,56 +48,88 @@ public:
int maxNum; int maxNum;
/* the memory pool for data array allocation */ /* the memory pool for data array allocation */
XMem * mem; XMem* mem;
/* indicates whether data items are integers */
bool isIntList;
public: public:
/* constructor */ /* constructor */
XList(); TensorListBase();
/* constructor */ /* constructor */
XList(int myMaxNum, bool isIntListOrNot = false); TensorListBase(int myMaxNum);
/* constructor */ /* constructor */
XList(int myMaxNum, XMem * myMem, bool isIntListOrNot = false); TensorListBase(int myMaxNum, XMem* myMem);
/* de-constructor */ /* de-constructor */
~XList(); ~TensorListBase();
/* utilities */ /* add an item into the list */
void Create(int myMaxNum, XMem * myMem); void Add(T&& item);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount); /* add an item into the list */
void AddList(XList * l); void Add(const T& item);
void AddInt(int i);
void Insert(int pos, void * item); /* add a number of items into the list */
void * GetItem(int i) const; void Add(T* inputItems, int inputItemCount);
int GetItemInt(int i);
void SetItem(int i, void * item); /* append a list to the current list */
void SetItemInt(int i, int item); void AddList(TensorListBase* l);
int FindFirst(void * item); /* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
void Clear(); void Clear();
void ClearStringList();
void Sort(int itemSize, ListCompare comp); /* sort the list */
void Sort(int itemSize);
/* reverse the list */
void Reverse(); void Reverse();
/* remove the item at position i */
void Remove(int i); void Remove(int i);
XList * Copy(XMem * myMem);
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
_XINLINE_ void * Get(int i) {return GetItem(i);}; T& operator[] (int i) {
_XINLINE_ int GetInt(int i) {return GetItemInt(i);}; return GetItem(i);
_XINLINE_ void Set(int i, void * item) {SetItem(i, item);}; };
_XINLINE_ void SetInt(int i, int item) {SetItemInt(i, item);}; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
}; };
extern XList NULLList; struct XTensor;
typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
typedef TensorListBase<void*> XList;
typedef TensorListBase<XTensor*> TensorList;
} } /* end of the nts (NiuTrans.Tensor) namespace */
/* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif // __TensorList_H__
...@@ -34,6 +34,11 @@ namespace nts{ ...@@ -34,6 +34,11 @@ namespace nts{
int testxmemid = 0; int testxmemid = 0;
void * recordp = NULL; void * recordp = NULL;
/*
for managing the memories
*/
XMemManager GMems;
XMem * GMem; XMem * GMem;
/* constructor */ /* constructor */
...@@ -216,8 +221,8 @@ void XMem::Free(int myDevID, void * mem) ...@@ -216,8 +221,8 @@ void XMem::Free(int myDevID, void * mem)
} }
} }
/* /*
get the signature get the signature
<< return - the signature << return - the signature
*/ */
MTYPE XMem::GetSignature() MTYPE XMem::GetSignature()
...@@ -275,7 +280,7 @@ void XMem::SetDeviceFast(int myDevID) ...@@ -275,7 +280,7 @@ void XMem::SetDeviceFast(int myDevID)
} }
/* /*
run in the static mode run in the static mode
>> myIsStatic - specify if the memory allocation is static >> myIsStatic - specify if the memory allocation is static
*/ */
void XMem::SetStaticMode(bool myIsStatic) void XMem::SetStaticMode(bool myIsStatic)
...@@ -1488,4 +1493,158 @@ cublasHandle_t * XMem::GetCublasHandle() ...@@ -1488,4 +1493,158 @@ cublasHandle_t * XMem::GetCublasHandle()
#endif #endif
/* constructor */
XMemManager::XMemManager()
{
Initialize();
}
/* de-constructor */
XMemManager::~XMemManager()
{
}
/* get memory size */
MTYPE XMemManager::GetAvailableMemory()
{
unsigned long freeMem = 0;
#ifndef WIN32
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#else
MEMORYSTATUSEX memoryStatus;
memoryStatus.dwLength = sizeof(memoryStatus);
if (GlobalMemoryStatusEx(&memoryStatus)){
freeMem = memoryStatus.ullAvailPhys;
}
#endif
return (MTYPE)freeMem;
}
/* get GPU memory size */
MTYPE XMemManager::GetAvailableGPUMemory(int devID)
{
size_t freeMem = 0;
size_t totalMem = 0;
#ifdef USE_CUDA
cudaSetDevice(devID);
if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
XPRINT(0, stderr, "cannot get GPU memory information.");
exit(1);
}
#endif
return (MTYPE)freeMem;
}
/* get buffer size */
void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
{
*myBufSize = 0;
if (freeMem >= MILLION * 128){
*myBufSize = MILLION * 32;
if (freeMem >= MILLION * 256){
*myBufSize = MILLION * 64;
if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256;
if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512;
}
}
}
}
}
/* initialize it and set the global memory information */
void XMemManager::Initialize()
{
srand((unsigned int)time(NULL));
Free();
/* CPUs (we actually do not care about how many CPUs are using) */
nCPUMem = 1;
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
/* GPUs */
nGPUMem = 0;
#ifdef USE_CUDA
if (cudaGetDeviceCount(&nGPUMem) != cudaSuccess) {
XPRINT(0, stderr, "cannot get GPU information.");
exit(1);
}
for (int i = 0; i < nGPUMem; i++) {
MTYPE freeMem = GetAvailableGPUMemory(i);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[i].Initialize(i, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
}
#endif
}
/* free it */
void XMemManager::Free()
{
for (int i = 0; i < MAX_CPU_NUM; i++)
CPUMems[i].Free();
for (int i = 0; i < MAX_GPU_NUM; i++)
GPUMems[i].Free();
}
/* get global memory pool */
XMem * XMemManager::GetMem(const int devID)
{
XMem * mem = NULL;
if (devID < 0)
mem = CPUMems;
else{
if (devID < nGPUMem)
mem = GPUMems + devID;
else
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
}
return mem;
}
/* get global memory size */
int XMemManager::GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize)
{
XMem * mem = GetMem(devID);
int result = 0;
if (mem != NULL){
*myBlockSize = mem->maxBlockSize;
*myBlockNum = mem->blockNum;
*myBufSize = mem->bufSize;
result = 1;
}
return result;
}
/* show memory information */
void XMemManager::ShowMemInfo()
{
XPRINT(1, stderr, "Memory Information:\n");
MTYPE myBlockSize, myBufSize;
int myBlockNum;
for(int i = 0; i < nCPUMem; i++){
GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%d, blockNum:%d, bufSize:%d\n", myBlockSize, myBlockNum, myBufSize);
}
for(int i = 0; i < nGPUMem; i++){
GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%d, blockNum:%d, bufSize:%d\n", i, myBlockSize, myBlockNum, myBufSize);
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -39,6 +39,12 @@ ...@@ -39,6 +39,12 @@
#include <curand.h> #include <curand.h>
#endif #endif
#ifndef WIN32
#include <unistd.h>
#else
#include <windows.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
...@@ -53,6 +59,8 @@ typedef long long INT_64; ...@@ -53,6 +59,8 @@ typedef long long INT_64;
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_NUM 16
#define MAX_GPU_NUM 16
/* /*
mode of runnig a memory pool mode of runnig a memory pool
...@@ -413,6 +421,61 @@ public: ...@@ -413,6 +421,61 @@ public:
}; };
/*
a class for the management of memory
*/
class XMemManager
{
public:
/* cpu memory pool information */
XMem CPUMems[MAX_CPU_NUM];
/* number of cpu memory pools */
int nCPUMem;
/* gpu memory pool information */
XMem GPUMems[MAX_GPU_NUM];
/* number of gpu memory pools */
int nGPUMem;
public:
/* constructor */
XMemManager();
/* de-constructor */
~XMemManager();
/* get memory size */
MTYPE GetAvailableMemory();
/* get GPU memory size */
MTYPE GetAvailableGPUMemory(int devID);
/* get buffer size */
void GetBufferSize(MTYPE freeMem, MTYPE * myBufSize);
/* initialize it and set the global memory information */
void Initialize();
/* free it */
void Free();
/* get global memory pool */
XMem * GetMem(const int devID);
/* get global memory size */
int GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize);
/* show memory information */
void ShowMemInfo();
};
/* managing the memories */
extern XMemManager GMems;
extern XMem * GMem; extern XMem * GMem;
extern int testxmemid; extern int testxmemid;
......
...@@ -77,6 +77,12 @@ const char * GetOPName(int type) ...@@ -77,6 +77,12 @@ const char * GetOPName(int type)
return "M_POWER"; return "M_POWER";
else if (type == MATH_SCALEANDSHIFT) else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT"; return "M_SCALEANDSHIFT";
else if (type == MATH_SCALE)
return "M_SCALE";
else if (type == MATH_DESCALE)
return "M_DESCALE";
else if (type == MATH_SHIFT)
return "M_SHIFT";
else if (type == MATH_MULANDSHIFT) else if (type == MATH_MULANDSHIFT)
return "M_OPERATION"; return "M_OPERATION";
else if (type == MATH_SIGN) else if (type == MATH_SIGN)
...@@ -111,6 +117,8 @@ const char * GetOPName(int type) ...@@ -111,6 +117,8 @@ const char * GetOPName(int type)
return "M_COPYVALUES"; return "M_COPYVALUES";
else if (type == MOVEMENT_GATHER) else if (type == MOVEMENT_GATHER)
return "M_GATHER"; return "M_GATHER";
else if (type == MOVEMENT_DROPOUTWITHINDEX)
return "M_DROPOUTWITHINDEX";
else if (type == SHAPE_CONCATENATE) else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE"; return "S_CONCATENATE";
else if (type == SHAPE_MERGE) else if (type == SHAPE_MERGE)
...@@ -152,6 +160,10 @@ const char * GetOPName(int type) ...@@ -152,6 +160,10 @@ const char * GetOPName(int type)
else if (type == FUNC_SOFTMAX) else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX"; return "F_SOFTMAX";
} }
else if ((type & LOSS_BASE) != 0) {
if (type == LOSS_CROSSENTROPY)
return "L_CROSSENTROPY";
}
return "NULL"; return "NULL";
} }
......
...@@ -58,7 +58,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -58,7 +58,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_POWER MATH_NORMALIZE + 1 #define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1 #define MATH_SCALEANDSHIFT MATH_POWER + 1
#define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1 #define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1
#define MATH_SIGN MATH_MULANDSHIFT + 1 #define MATH_SCALE MATH_MULANDSHIFT + 1
#define MATH_DESCALE MATH_SCALE + 1
#define MATH_SHIFT MATH_DESCALE + 1
#define MATH_MOD MATH_SHIFT + 1
#define MATH_SIGN MATH_MOD + 1
#define MATH_SUB MATH_SIGN + 1 #define MATH_SUB MATH_SIGN + 1
#define MATH_SUBDIM MATH_SUB + 1 #define MATH_SUBDIM MATH_SUB + 1
#define MATH_SUM MATH_SUBDIM + 1 #define MATH_SUM MATH_SUBDIM + 1
...@@ -81,8 +85,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -81,8 +85,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MOVEMENT_COPYINDEXED MOVEMENT + 1 #define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1 #define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1 #define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define MOVEMENT_DROPOUTWITHINDEX MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_GATHER + 1 #define SHAPE MOVEMENT_DROPOUTWITHINDEX + 1
#define SHAPE_CONCATENATE SHAPE + 1 #define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1 #define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1 #define SHAPE_MERGE_LIST SHAPE_MERGE + 1
...@@ -108,6 +113,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -108,6 +113,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define FUNC_SIGMOID FUNC_RECTIFY + 1 #define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1 #define FUNC_SOFTMAX FUNC_SIGMOID + 1
#define LOSS_BASE FUNCTION_BASE * 2
#define LOSS_CROSSENTROPY LOSS_BASE + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type); const char * GetOPName(int type);
......
...@@ -146,7 +146,7 @@ run a set of jobs in parallel ...@@ -146,7 +146,7 @@ run a set of jobs in parallel
>> jobArgs - the list of arguments for each job >> jobArgs - the list of arguments for each job
>> sleepTime - time to sleep (in ms) for each round >> sleepTime - time to sleep (in ms) for each round
*/ */
void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
{ {
if(threadNum <= 0){ if(threadNum <= 0){
XPRINT(1, stderr, "Error! No threads were created!\n"); XPRINT(1, stderr, "Error! No threads were created!\n");
...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime) ...@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c); TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);
/* the arguments that are passed to the function */ /* the arguments that are passed to the function */
volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c); volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);
/* thread */ /* thread */
XThread * thread = threads + availableThreads[i]; XThread * thread = threads + availableThreads[i];
......
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
void KillThreads(); void KillThreads();
/* run a set of jobs in parallel */ /* run a set of jobs in parallel */
void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0); void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);
/* get the number of parallel jobs to run */ /* get the number of parallel jobs to run */
int GetJobNum(int size); int GetJobNum(int size);
......
...@@ -42,7 +42,7 @@ job item used in queues ...@@ -42,7 +42,7 @@ job item used in queues
JobQueueNode::JobQueueNode() JobQueueNode::JobQueueNode()
{ {
job = NULL; job = NULL;
args = new XList(1); args = new TensorList(1);
} }
/* de-constructor */ /* de-constructor */
...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize) ...@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
head = 0; head = 0;
tail = 0; tail = 0;
isJobQueue = false; isJobQueue = false;
jobDequeuerArgs = new XList(1); jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false; jobDequeuerBreak = false;
runningJobCount = 0; runningJobCount = 0;
jobStream = NULL; jobStream = NULL;
...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID) ...@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true; isJobQueue = true;
jobDequeuerArgs->Clear(); jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid); // warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
jobDequeuer.function = (TFunction)DequeueJobs; jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs; jobDequeuer.argv = jobDequeuerArgs;
...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer() ...@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer()
} }
/* add a job item to process */ /* add a job item to process */
void XQueue::EnqueueJob(void * job, XList * jobArgs) void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{ {
MUTEX_LOCK(jobQueueMutex); MUTEX_LOCK(jobQueueMutex);
runningJobCount++; runningJobCount++;
...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs) ...@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
} }
/* job item consumer */ /* job item consumer */
void XQueue::DequeueJobs(XList * args) void XQueue::DequeueJobs(TensorList * args)
{ {
CheckNTErrors((args->count == 2), "Illegal arguments!"); CheckNTErrors((args->count == 2), "Illegal arguments!");
......
...@@ -52,7 +52,7 @@ public: ...@@ -52,7 +52,7 @@ public:
void * job; void * job;
/* arguments of the job */ /* arguments of the job */
XList * args; TensorList * args;
public: public:
/* constructor */ /* constructor */
...@@ -102,7 +102,7 @@ private: ...@@ -102,7 +102,7 @@ private:
XThread jobDequeuer; XThread jobDequeuer;
/* argument list of jobDequeuer */ /* argument list of jobDequeuer */
XList * jobDequeuerArgs; TensorList * jobDequeuerArgs;
/* indicates whether jobDequeuer stops */ /* indicates whether jobDequeuer stops */
bool jobDequeuerBreak; bool jobDequeuerBreak;
...@@ -141,11 +141,11 @@ public: ...@@ -141,11 +141,11 @@ public:
void StopJobConsumer(); void StopJobConsumer();
/* add a job item to process */ /* add a job item to process */
void EnqueueJob(void * job, XList * jobArgs); void EnqueueJob(void * job, TensorList * jobArgs);
/* job item consumer */ /* job item consumer */
static static
void DequeueJobs(XList * args); void DequeueJobs(TensorList * args);
/* get the break flag */ /* get the break flag */
bool GetJobBreak(); bool GetJobBreak();
......
...@@ -255,6 +255,10 @@ public: ...@@ -255,6 +255,10 @@ public:
static static
bool IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c); bool IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c);
/* judge whether b is the reduced shape of a ?? */
static
bool IsReduceShaped(const XTensor * a, const XTensor * b, int dim);
/* set the size of each dimension */ /* set the size of each dimension */
void SetDim(int * myDimSize); void SetDim(int * myDimSize);
...@@ -446,29 +450,57 @@ extern int MakeTensorID(); ...@@ -446,29 +450,57 @@ extern int MakeTensorID();
void InitTensor(XTensor * tensor, void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL); const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* initialize a dense vector */ /* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num, void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense matrix */ /* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum, void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense 3d tensor */ /* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2, void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense 4d tensor */ /* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense 5d tensor */ /* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4, void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a tensor with a reference tensor */ /* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference); void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */ /* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference); void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
...@@ -480,38 +512,72 @@ XTensor * NewTensor(); ...@@ -480,38 +512,72 @@ XTensor * NewTensor();
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL); const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a XTensor which allocates data on the buffer */ /* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL); const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* generate a XTensor which allocates data on the buffer */ /* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem); XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID);
/* generate a dense vector */ /* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL); XMem * myMem = NULL);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* generate a dense matrix */ /* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum, XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL); const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a dense 3d tensor */ /* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2, XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL); const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a dense 4d tensor */ /* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3, XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL); const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a dense 5d tensor */ /* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4, XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL); const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a copy of XTensor (with a reference to a given tensor) */ /* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true); XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
......
...@@ -85,7 +85,7 @@ namespace nts{ ...@@ -85,7 +85,7 @@ namespace nts{
#endif #endif
typedef void (*TFunction) (volatile XList*); typedef void (*TFunction) (volatile TensorList*);
/* /*
This is a class that wraps the standard implementation of threading This is a class that wraps the standard implementation of threading
...@@ -133,7 +133,7 @@ public: ...@@ -133,7 +133,7 @@ public:
/* arguments (for the function to run) */ /* arguments (for the function to run) */
volatile volatile
XList * argv; TensorList * argv;
/* a flag to break */ /* a flag to break */
volatile volatile
......
...@@ -97,4 +97,5 @@ ...@@ -97,4 +97,5 @@
#include "utilities/XMatrixSegment.h" #include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h" #include "utilities/FlushToMem.h"
#include "../function/DropoutWithIndex.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
...@@ -218,4 +218,55 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim) ...@@ -218,4 +218,55 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
return c; return c;
} }
/*
element-wise division of two tensors
c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the item
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetDivDimIndex(a, b);
if (n == -1) {
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
/* call _Div function */
_Div(&a, &b, &c, 0, leadingDim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
}
}
else if (n >= 0 && n < a.order) {
/* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -49,6 +49,13 @@ where i is the index of the element ...@@ -49,6 +49,13 @@ where i is the index of the element
*/ */
XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha = 0.0, int leadingDim = 0); XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha = 0.0, int leadingDim = 0);
/*
element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element
*/
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __DIV_H__ #endif // __DIV_H__
\ No newline at end of file
...@@ -170,5 +170,36 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha) ...@@ -170,5 +170,36 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
return c; return c;
} }
/*
tensor division
c = a / b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put result. we save it in a if c is NULL
>> n - the dimension index
>> alpha - the scaling factor
>> requireLink - if add operation to network
*/
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Div function */
_DivDim(&a, &b, &c, n, alpha);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
} }
...@@ -52,6 +52,14 @@ i.e., a is divided with b by broadcasting ...@@ -52,6 +52,14 @@ i.e., a is divided with b by broadcasting
we make a new tensor c to keep the result and return it we make a new tensor c to keep the result and return it
*/ */
XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha = (DTYPE)0.0); XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha = (DTYPE)0.0);
/*
tensor division of two tensors:
c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting
*/
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks -
* busy days :(
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "Mask.h"
#include "Mask.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
mask entries of a given tensor:
c(i) = a(i) if mask(i) is non-zero
c(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha)
{
CheckNTErrors(a && mask && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == mask->unitNum && a->unitNum == c->unitNum,
"Unmatched tensors in addition!");
CheckNTErrors(mask->dataType == X_INT, "The mask tensor must be in X_INT!")
//CheckNTErrors(a->dataType == mask->dataType && a->dataType == c->dataType,
// "Unmatched tensors in addition!");
if (a->devID >= 0 || mask->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
if (a == c) {
int P2PAccesible = 0;
#ifdef CUDA_UVA
cudaDeviceCanAccessPeer(&P2PAccesible, a->devID, b->devID);
#endif
if ((a->devID < 0 && mask->devID >= 0) ||
(a->devID >= 0 && mask->devID < 0) ||
(a->devID >= 0 && mask->devID >= 0 && a->devID != mask->devID && !P2PAccesible))
{
ShowNTErrors("Cannot run this method on multiple devices simultaneously!");
}
else
_CudaMask(a, mask, c, alpha);
}
else
_CudaMask(a, mask, c, alpha);
#endif
}
else {
if (!a->isSparse && !mask->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!");
if (a->dataType == DEFAULT_DTYPE &&
mask->dataType == X_INT &&
c->dataType == DEFAULT_DTYPE)
{
DTYPE * ap = (DTYPE*)a->data;
int * maskp = (int*)mask->data;
DTYPE * cp = (DTYPE*)c->data;
/* unrolling */
int num = a->unitNum;
if (num % 2 == 0) {
for (int i = 0; i < num; i += 2) {
if (maskp[i] == 0) {
cp[i] = alpha;
}
else {
cp[i] = ap[i];
}
if (maskp[i + 1] == 0) {
cp[i + 1] = alpha;
}
else {
cp[i + 1] = ap[i + 1];
}
}
}
else {
for (int i = 0; i < num; i++) {
if (maskp[i] == 0) {
cp[i] = alpha;
}
else {
cp[i] = ap[i];
}
}
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
}
/*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
{
_Mask(a, mask, a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
{
XTensor c(&a);
c.SetTMPFlag();
/* call _Sum function */
_Mask(&a, &mask, &c, alpha);
/* tensor connections */
//XLink::MakeLink(&a, &mask, &c, MATH_SUM);
//XLink::AddParamToHead(&c, alpha);
// TODO!!
ShowNTErrors("TODO!");
return c;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks -
* busy days :(
*/
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "Sub.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
mask entries of a given tensor (CUDA Kernel)
c = a - b * \beta
>> a - A matrix
>> mask - mask matrix
>> c - where we put masked a
>> size - the size of a/b/c
>> alpha - value
*/
__global__
void KernelMASK(DTYPE * a, int * mask, DTYPE * c, int size, DTYPE alpha)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (mask[i] == 0) {
c[i] = alpha;
}
else {
c[i] = a[i];
}
}
}
/*
mask entries of a given tensor (cuda version)
>> a - a tensor
>> mask - mask tensor
>> c - where we put masked a
>> alpha - value
*/
void _CudaMask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha)
{
CheckNTErrors(a && mask && c, "Empty tensor input!");
CheckNTErrors((a->unitNum == mask->unitNum && a->unitNum == c->unitNum),
"Unmatched tensors in addition!");
CheckNTErrors(mask->dataType == X_INT, "The mask tensor must be in X_INT!")
//CheckNTErrors((a->dataType == mask->dataType && a->dataType == c->dataType),
// "Unmatched tensors in addition!");
CheckNTErrors((a->devID == mask->devID && a->devID == c->devID),
"The tensors must be on the same!");
int devIDBackup = XDevice::GetGPUDevice();
XDevice::SetGPUDevice(a->devID);
if (!a->isSparse && !mask->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse matrix in addition!");
if (a->dataType == DEFAULT_DTYPE &&
mask->dataType == X_INT &&
c->dataType == DEFAULT_DTYPE)
{
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
KernelMASK << <blocks, threads >> >((DTYPE*)a->data, (int *)mask->data, (DTYPE*)c->data, a->unitNum, alpha);
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
XDevice::SetGPUDevice(devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks -
* busy days :(
*/
#ifndef __MASK_CUH__
#define __MASK_CUH__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* mask entries of a given tensor (cuda version) */
void _CudaMask(const XTensor * a, const XTensor * mask, XTensor * c = NULL, DTYPE alpha = (DTYPE)1.0);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __MASK_CUH__
\ No newline at end of file
...@@ -54,4 +54,4 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0); ...@@ -54,4 +54,4 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MASK_H__ #endif // __MASK_H__
\ No newline at end of file
...@@ -108,9 +108,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -108,9 +108,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
cBlockNum *= b->dimSizeRDI[i]; cBlockNum *= b->dimSizeRDI[i];
} }
XList * aList = new XList(10); TensorList * aList = new TensorList(10);
XList * bList = new XList(10); TensorList * bList = new TensorList(10);
XList * cList = new XList(10); TensorList * cList = new TensorList(10);
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] }; int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] }; int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] }; int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
...@@ -202,6 +202,42 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -202,6 +202,42 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList; delete cList;
} }
bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c)
{
if (!(a && b && c))
return false;
if(!(a->dataType == b->dataType && a->dataType == c->dataType))
return false;
if (!(a->order >= 2 && b->order >= 2 && c->order >= 2))
return false;
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a->order + b->order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a->order; i++)
dimSize[sub++] = a->dimSizeRDI[a->order + 1 - i];
for (int i = 2; i < b->order; i++)
dimSize[sub++] = b->dimSizeRDI[b->order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
for (int i = 0; i < order; i++) {
if (dimSize[i] != c->dimSize[i])
return false;
}
return true;
}
/* /*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -266,6 +302,53 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -266,6 +302,53 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
return c; return c;
} }
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
if (!c.isInit || !CheckMMulShape(&a, transposedA, &b, transposedB, &c)) {
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
InitTensor(&c, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
}
}
/* /*
matrix multiplication with no transposition c = a * b * alpha matrix multiplication with no transposition c = a * b * alpha
>> a - tensor a >> a - tensor a
...@@ -316,6 +399,52 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -316,6 +399,52 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
return c; return c;
} }
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
if (!c.isInit || !CheckMMulShape(&a, X_NOTRANS, &b, X_NOTRANS, &c)) {
int an = a.dimSizeRDI[1];
int am = a.dimSizeRDI[0];
int bn = b.dimSizeRDI[1];
int bm = b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
InitTensor(&c, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -59,10 +59,16 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x ...@@ -59,10 +59,16 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false);
/* matrix multiplication with no transposition c = a * b * alpha*/ /* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b, XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -38,17 +38,23 @@ argument5: matrix a ...@@ -38,17 +38,23 @@ argument5: matrix a
argument6: matrix b argument6: matrix b
argument7: matrix c (c=a*b*\alpha + c*beta) argument7: matrix c (c=a*b*\alpha + c*beta)
*/ */
void _MatrixMul2DMultiTheading(XList * args) void _MatrixMul2DMultiTheading(TensorList * args)
{ {
int x1 = *(int*)args->GetItem(0); CheckNTErrors(args->count == 2, "invalid argument number!");
int y1 = *(int*)args->GetItem(1); IntList * indexArgs = (IntList*)args->GetItem(0);
int x2 = *(int*)args->GetItem(2); TensorList * matrixArgs = (TensorList*)args->GetItem(1);
int y2 = *(int*)args->GetItem(3); CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
XTensor * a = (XTensor*)args->GetItem(4); CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * b = (XTensor*)args->GetItem(5);
XTensor * c = (XTensor*)args->GetItem(6); XTensor * a = matrixArgs->GetItem(0);
DTYPE alpha = *(DTYPE*)args->GetItem(7); XTensor * b = matrixArgs->GetItem(1);
DTYPE beta = *(DTYPE*)args->GetItem(8); XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX #ifdef FAST_MATRIX
int am = a->dimSize[1]; int am = a->dimSize[1];
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication for a block (x1,y1) - (x2,y2) matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/ */
void _MatrixMul2DMultiTheading(XList * args); void _MatrixMul2DMultiTheading(TensorList * args);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -227,9 +227,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1] ...@@ -227,9 +227,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> alpha - scalar >> alpha - scalar
>> beta - scalar >> beta - scalar
*/ */
void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, const TensorList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta) TensorList * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors(a && b && c, "Empty input lists!"); CheckNTErrors(a && b && c, "Empty input lists!");
CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!"); CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");
......
...@@ -58,8 +58,8 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, cons ...@@ -58,8 +58,8 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, cons
matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta (for list inputs) matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta (for list inputs)
optimized for GPU optimized for GPU
*/ */
void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, const TensorList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); TensorList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
/* /*
matrix multiplication of the two tensors (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication of the two tensors (return an XTensor structure) c = trans(a) * trans(b) * alpha
......
...@@ -117,7 +117,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, ...@@ -117,7 +117,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT); XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -219,4 +219,55 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim ...@@ -219,4 +219,55 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
return c; return c;
} }
/*
element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the item
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetMultiplyDimIndex(a, b);
if (n == -1) {
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
/* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
}
}
else if (n >= 0 && n < a.order) {
/* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -49,6 +49,13 @@ where i is the index of the element ...@@ -49,6 +49,13 @@ where i is the index of the element
*/ */
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0.0, int leadingDim = 0); XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0.0, int leadingDim = 0);
/*
element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element
*/
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLY_H__ #endif // __MULTIPLY_H__
\ No newline at end of file
...@@ -169,6 +169,36 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n) ...@@ -169,6 +169,36 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
return c; return c;
} }
/*
tensor multiplication
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a * b + \alpha * c. we save it in a if c is NULL
>> n - the dimension index
>> requireLink - if add operation to network
*/
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, 0);
}
}
/* /*
tensor broadcast multiplication tensor broadcast multiplication
c = a * b + c * \beta c = a * b + c * \beta
...@@ -309,4 +339,30 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b) ...@@ -309,4 +339,30 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b)
return c; return c;
} }
/*
tensor broadcast multiplication
c = a * b + c * \beta
where some of dimensions of b can be of size 1
>> a - a tensor
>> b - another tensor that would be broadcasted
>> c - the resulting tensor
>> requireLink - if add operation to network
*/
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0);
}
}
} }
...@@ -38,6 +38,10 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0); ...@@ -38,6 +38,10 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n); XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink = false);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -45,6 +49,9 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -45,6 +49,9 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
we return the resulting tensor here */ we return the resulting tensor here */
XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b); XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYDIM_H__ #endif // __MULTIPLYDIM_H__
...@@ -79,4 +79,25 @@ XTensor Negate(const XTensor & a) ...@@ -79,4 +79,25 @@ XTensor Negate(const XTensor & a)
return b; return b;
} }
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Negate(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Negate function */
_Negate(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it ...@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it
*/ */
XTensor Negate(const XTensor & a); XTensor Negate(const XTensor & a);
/* set every entry to its minus value */
void Negate(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_H__ #endif // __NEGATE_H__
...@@ -84,4 +84,25 @@ XTensor Sign(const XTensor & a) ...@@ -84,4 +84,25 @@ XTensor Sign(const XTensor & a)
return b; return b;
} }
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Sign(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Sign function */
_Sign(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it ...@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it
*/ */
XTensor Sign(const XTensor & a); XTensor Sign(const XTensor & a);
/* set every entry to its sign value */
void Sign(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__ #endif // __SIGN_H__
...@@ -196,4 +196,47 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -196,4 +196,47 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
return c; return c;
} }
/*
tensor subtraction c = a - b * \beta
>> a - a tensor
>> b - another tensor
>> c - where we put a-b*\beta. we save it in a if c is NULL
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetSubDimIndex(a, b);
if (n == -1) {
/* call _Sub function */
_Sub(&a, &b, &c, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta);
}
}
else if (n >= 0 && n < a.order) {
/* call _SubDim function */
_SubDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -42,6 +42,9 @@ make a new tensor c to keep the result and return it ...@@ -42,6 +42,9 @@ make a new tensor c to keep the result and return it
*/ */
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SUB_H__ #endif // __SUB_H__
...@@ -171,4 +171,35 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) ...@@ -171,4 +171,35 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
return c; return c;
} }
/*
tensor subtraction
c = a - b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a-b*\beta. we save it in a if c is NULL
>> n - the dimension index
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Sub function */
_SubDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
} }
...@@ -38,6 +38,10 @@ void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0); ...@@ -38,6 +38,10 @@ void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0);
i.e., a is subtracted with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is subtracted with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0); XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SUBDIM_H__ #endif // __SUBDIM_H__
...@@ -201,4 +201,46 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -201,4 +201,46 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
return c; return c;
} }
/*
tensor summation c = a + b * \beta
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetSumDimIndex(a, b);
if (n == -1) {
/* call _Sum function */
_Sum(&a, &b, &c, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
}
}
else if (n >= 0 && n < a.order) {
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -41,6 +41,9 @@ make a new tensor c to keep the result and return it ...@@ -41,6 +41,9 @@ make a new tensor c to keep the result and return it
*/ */
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SUM_H__ #endif // __SUM_H__
...@@ -188,6 +188,37 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) ...@@ -188,6 +188,37 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
return c; return c;
} }
/*
tensor summation
c = a + b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a+b*\beta. we save it in a if c is NULL
>> n - the dimension index
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
/* /*
tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1
c = a + b * \beta c = a + b * \beta
...@@ -328,5 +359,31 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -328,5 +359,31 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta)
return c; return c;
} }
/*
tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1
c = a + b * \beta
>> a - a tensor
>> b - another tensor that would be broadcasted
>> c - the resulting tensor
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta);
}
}
} }
...@@ -42,12 +42,19 @@ void _SumDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0); ...@@ -42,12 +42,19 @@ void _SumDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0);
i.e., a is summed with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is summed with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0); XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1. /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1.
we return the resulting tensor here */ we return the resulting tensor here */
XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -201,9 +201,9 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -201,9 +201,9 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void _CudaBLASMatrixMULList(cublasHandle_t * handle, void _CudaBLASMatrixMULList(cublasHandle_t * handle,
const XList * a, MATRIX_TRANS_TYPE transposedA, const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, const TensorList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, TensorList * c,
int count, DTYPE alpha, DTYPE beta) int count, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input lists!"); CheckNTErrors((a && b && c), "Empty input lists!");
......
...@@ -56,8 +56,8 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -56,8 +56,8 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA, void _CudaBLASMatrixMULList(cublasHandle_t * handle, const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, const TensorList * b, MATRIX_TRANS_TYPE transposedB, TensorList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
#endif #endif
......
...@@ -111,9 +111,10 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -111,9 +111,10 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
onehot->SetZeroAll(); onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP; float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size; float lowconfidence = labelSmoothingP / size;
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence); _CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return; return;
...@@ -129,8 +130,7 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -129,8 +130,7 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
int id = indexData[i]; int id = indexData[i];
DTYPE * od = onehotData + i * stride; DTYPE * od = onehotData + i * stride;
od[id] = 2; od[id] = 1;
//onehotData[i * stride + id] = 1;
} }
} }
...@@ -164,4 +164,4 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) ...@@ -164,4 +164,4 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
return onehot; return onehot;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -155,4 +155,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi ...@@ -155,4 +155,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -81,4 +81,4 @@ void _SetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * values ...@@ -81,4 +81,4 @@ void _SetDataWithOffsetAndValue(XTensor * tensor, MTYPE * offsets, void * values
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SETDATA_H__ #endif // __SETDATA_H__
\ No newline at end of file
...@@ -36,18 +36,36 @@ int cudascale(int x, int scale) ...@@ -36,18 +36,36 @@ int cudascale(int x, int scale)
} }
__device__ __device__
float cudascale(float x, float scale)
{
return x * scale;
}
__device__
int cudadescale(int x, int descale) int cudadescale(int x, int descale)
{ {
return x / descale; return x / descale;
} }
__device__ __device__
float cudadescale(float x, float descale)
{
return x / descale;
}
__device__
int cudashift(int x, int shift) int cudashift(int x, int shift)
{ {
return x + shift; return x + shift;
} }
__device__ __device__
float cudashift(float x, float descale)
{
return x + descale;
}
__device__
int cudamod(int x, int mod) int cudamod(int x, int mod)
{ {
return x % mod; return x % mod;
...@@ -92,9 +110,51 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, int num) \ ...@@ -92,9 +110,51 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, int num) \
BacktoCudaDev(a->devID, devIDBackup); \ BacktoCudaDev(a->devID, devIDBackup); \
} \ } \
#define SIMPLE_BINARY_FUNCTION_FLOAT_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(float * a, float * b, int size, float num) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
b[i] = (float)origFunc(a[i], num); \
} \
\
\
void _Cuda##funcName(const XTensor * a, XTensor * b, float num) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == X_FLOAT) { \
Kernel##funcName<<<blocks, threads>>> \
((float*)a->data, (float*)b->data, a->unitNum, num);\
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
}
SIMPLE_BINARY_FUNCTION_GPU(Scale, cudascale) SIMPLE_BINARY_FUNCTION_GPU(Scale, cudascale)
SIMPLE_BINARY_FUNCTION_FLOAT_GPU(ScaleFloat, cudascale)
SIMPLE_BINARY_FUNCTION_GPU(Descale, cudadescale) SIMPLE_BINARY_FUNCTION_GPU(Descale, cudadescale)
SIMPLE_BINARY_FUNCTION_FLOAT_GPU(DescaleFloat, cudadescale)
SIMPLE_BINARY_FUNCTION_GPU(Shift, cudashift) SIMPLE_BINARY_FUNCTION_GPU(Shift, cudashift)
SIMPLE_BINARY_FUNCTION_FLOAT_GPU(ShiftFloat, cudashift)
SIMPLE_BINARY_FUNCTION_GPU(Mod, cudamod) SIMPLE_BINARY_FUNCTION_GPU(Mod, cudamod)
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -32,20 +32,29 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -32,20 +32,29 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* scale each entry (CUDA Kernel) */ /* scale each entry (CUDA Kernel) */
__global__ __global__
void KernelScale(int * a, int * b, int size, int scale); void KernelScale(int * a, int * b, int size, int scale);
__global__
void KernelScale(int * a, int * b, int size, float scale);
/* scale each entry */ /* scale each entry */
void _CudaScale(const XTensor * a, XTensor * b, int scale); void _CudaScale(const XTensor * a, XTensor * b, int scale);
void _CudaScaleFloat(const XTensor * a, XTensor * b, float scale);
/* descale each entry (CUDA Kernel) */ /* descale each entry (CUDA Kernel) */
__global__ __global__
void KernelDescale(int * a, int * b, int size, int scale); void KernelDescale(int * a, int * b, int size, int scale);
__global__
void KernelDescale(int * a, int * b, int size, float scale);
/* descale each entry */ /* descale each entry */
void _CudaDescale(const XTensor * a, XTensor * b, int scale); void _CudaDescale(const XTensor * a, XTensor * b, int scale);
void _CudaDescaleFloat(const XTensor * a, XTensor * b, float scale);
/* shift each entry (CUDA Kernel) */ /* shift each entry (CUDA Kernel) */
__global__ __global__
void KernelShift(int * a, int * b, int size, int shift); void KernelShift(int * a, int * b, int size, int shift);
__global__
void KernelShift(int * a, int * b, int size, float shift);
/* shift each entry */ /* shift each entry */
void _CudaShift(const XTensor * a, XTensor * b, int shift); void _CudaShift(const XTensor * a, XTensor * b, int shift);
void _CudaShiftFloat(const XTensor * a, XTensor * b, float shift);
/* mod each entry (CUDA Kernel) */ /* mod each entry (CUDA Kernel) */
__global__ __global__
......
...@@ -37,51 +37,76 @@ void _Scale(const XTensor * a, XTensor * b, float scale); ...@@ -37,51 +37,76 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
scale up tensor entires (on site) scale up tensor entires (on site)
b = a * scale b = a * scale
*/ */
void Scale(XTensor & a, int scale); void _ScaleMe(XTensor & a, int scale);
void Scale(XTensor & a, float scale); void _ScaleMe(XTensor & a, float scale);
/* /*
scale up tensor entires scale up tensor entires
b = a * scale b = a * scale
*/ */
void Scale(const XTensor & a, XTensor &b, int scale); void Scale(const XTensor & a, XTensor &b, int scale);
void Scale(const XTensor & a, XTensor &b, float scale); void Scale(const XTensor & a, XTensor &b, float scale, bool requireLink = false);
/*
scale up tensor entires (return an XTensor structure)
b = a * scale
*/
XTensor Scale(const XTensor & a, float scale);
/* /*
descale tensor entires descale tensor entires
b = a / scale b = a / scale
*/ */
void _Descale(const XTensor * a, XTensor * b, int scale); void _Descale(const XTensor * a, XTensor * b, int scale);
void _Descale(const XTensor * a, XTensor * b, float scale);
/* /*
descale tensor entires (on site) descale tensor entires (on site)
b = a / scale b = a / scale
*/ */
void Descale(XTensor & a, int scale); void _DescaleMe(XTensor & a, int scale);
void _DescaleMe(XTensor & a, float scale);
/* /*
descale tensor entires descale tensor entires
b = a / scale b = a / scale
*/ */
void Descale(const XTensor & a, XTensor & b, int scale); void Descale(const XTensor & a, XTensor & b, int scale);
void Descale(const XTensor & a, XTensor & b, float scale, bool requireLink = false);
/*
descale tensor entires (return an XTensor structure)
b = a / scale
*/
XTensor Descale(const XTensor & a, float scale);
/* /*
shift tensor entires shift tensor entires
b = a + shift b = a + shift
*/ */
void _Shift(const XTensor * a, XTensor * b, int shift); void _Shift(const XTensor * a, XTensor * b, int shift);
void _Shift(const XTensor * a, XTensor * b, float shift);
/* /*
shift tensor entires (on site) shift tensor entires (on site)
b = a + shift b = a + shift
*/ */
void Shift(XTensor & a, int shift); void _ShiftMe(XTensor & a, int shift);
void _ShiftMe(XTensor & a, float shift);
/* /*
shift tensor entires shift tensor entires
b = a + shift b = a + shift
*/ */
void Shift(const XTensor & a, XTensor & b, int shift); void Shift(const XTensor & a, XTensor & b, int shift);
void Shift(const XTensor & a, XTensor & b, float shift, bool requireLink = false);
/*
shift tensor entires (return an XTensor structure)
b = a + shift
*/
XTensor Shift(const XTensor & a, float shift);
/* /*
mod tensor entires mod tensor entires
...@@ -93,7 +118,7 @@ void _Mod(const XTensor * a, XTensor * b, int base); ...@@ -93,7 +118,7 @@ void _Mod(const XTensor * a, XTensor * b, int base);
mod tensor entires (on site) mod tensor entires (on site)
b = a % mod b = a % mod
*/ */
void Mod(XTensor & a, int base); void _ModMe(XTensor & a, int base);
/* /*
mod tensor entires mod tensor entires
......
...@@ -94,6 +94,23 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) ...@@ -94,6 +94,23 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
return b; return b;
} }
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Clip function */
_Clip(&a, &b, lower, upper);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
}
}
/* /*
backward computation backward computation
......
...@@ -37,6 +37,8 @@ void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper); ...@@ -37,6 +37,8 @@ void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper); XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink = false);
/* /*
backward of Clip function backward of Clip function
*/ */
......
...@@ -138,12 +138,12 @@ XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTen ...@@ -138,12 +138,12 @@ XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTen
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon); _Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
/* tensor connections */ /* tensor connections */
XList list(5); TensorList list(5);
list.Add(&input); list.Add((XTensor*)&input);
list.Add(&mean); list.Add((XTensor*)&mean);
list.Add(&var); list.Add((XTensor*)&var);
list.Add(&a); list.Add((XTensor*)&a);
list.Add(&b); list.Add((XTensor*)&b);
XLink::MakeLink(&list, &output, MATH_NORMALIZE); XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon); XLink::AddParamToHead(&output, epsilon);
......
...@@ -102,4 +102,27 @@ XTensor Power(const XTensor & a, DTYPE p) ...@@ -102,4 +102,27 @@ XTensor Power(const XTensor & a, DTYPE p)
return b; return b;
} }
/*
get the power(a, p)
>> a - input tensor
>> b - output tensor
>> p - parameter
>> requireLink - if add operation to network
*/
void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Power function */
_Power(&a, &b, p);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_POWER);
XLink::AddParamToHead(&b, p);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it ...@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it
*/ */
XTensor Power(const XTensor & a, DTYPE p); XTensor Power(const XTensor & a, DTYPE p);
/* get the power(x, y) */
void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __POWER_H__ #endif // __POWER_H__
...@@ -118,4 +118,33 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift) ...@@ -118,4 +118,33 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
return b; return b;
} }
/*
scale and shift all tensor entires
b = a * scale + shift
>> a - the input tensor
>> b - the output tensor
>> scale - the scaler factor
>> shift - the shift factor
>> requireLink - if add operation to network
*/
void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale);
XLink::AddParamToHead(&b, shift);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -50,6 +50,12 @@ b = a * scale + shift ...@@ -50,6 +50,12 @@ b = a * scale + shift
*/ */
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0); XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
b = a * scale + shift
*/
void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SCALEANDSHIFT_H__ #endif // __SCALEANDSHIFT_H__
\ No newline at end of file
...@@ -82,58 +82,82 @@ XTensor funcName(const XTensor &a) \ ...@@ -82,58 +82,82 @@ XTensor funcName(const XTensor &a) \
return b; \ return b; \
} }
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (requireLink) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
_SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs) _SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute) _SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE) SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
SIMPLE_UNARY_FUNCTION_VOID(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil) _SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil) _SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL) SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
SIMPLE_UNARY_FUNCTION_VOID(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp) _SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp) _SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP) SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
SIMPLE_UNARY_FUNCTION_VOID(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor) _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor) _SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR) SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
SIMPLE_UNARY_FUNCTION_VOID(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero) _SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero) _SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO) SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero) _SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero) _SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO) SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log) _SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log) _SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG) SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
SIMPLE_UNARY_FUNCTION_VOID(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round) _SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round) _SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND) SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
SIMPLE_UNARY_FUNCTION_VOID(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt) _SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt) _SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT) SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
SIMPLE_UNARY_FUNCTION_VOID(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, square) _SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square) _SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE) SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
SIMPLE_UNARY_FUNCTION_VOID(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin) _SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin) _SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN) SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
SIMPLE_UNARY_FUNCTION_VOID(Sin, _Sin, MATH_SIN)
_SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos) _SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos)
_SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos) _SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos)
SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS) SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS)
SIMPLE_UNARY_FUNCTION_VOID(Cos, _Cos, MATH_COS)
_SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan) _SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan) _SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN) SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
SIMPLE_UNARY_FUNCTION_VOID(Tan, _Tan, MATH_TAN)
#else #else
/* define three marco separately, specify the respective function names (CPU mode) */ /* define three marco separately, specify the respective function names (CPU mode) */
...@@ -164,59 +188,82 @@ XTensor funcName(const XTensor &a) \ ...@@ -164,59 +188,82 @@ XTensor funcName(const XTensor &a) \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \ return b; \
} }
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (requireLink) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
_SIMPLE_UNARY_FUNCTION(_Absolute, fabs) _SIMPLE_UNARY_FUNCTION(_Absolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute) _SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE) SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
SIMPLE_UNARY_FUNCTION_VOID(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil) _SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil) _SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL) SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
SIMPLE_UNARY_FUNCTION_VOID(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, exp) _SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp) _SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP) SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
SIMPLE_UNARY_FUNCTION_VOID(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, floor) _SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor) _SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR) SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
SIMPLE_UNARY_FUNCTION_VOID(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero) _SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero) _SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO) SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero) _SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero) _SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO) SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, log) _SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log) _SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG) SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
SIMPLE_UNARY_FUNCTION_VOID(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, round) _SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round) _SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND) SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
SIMPLE_UNARY_FUNCTION_VOID(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt) _SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt) _SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT) SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
SIMPLE_UNARY_FUNCTION_VOID(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, square) _SIMPLE_UNARY_FUNCTION(_Square, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square) _SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE) SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
SIMPLE_UNARY_FUNCTION_VOID(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, sin) _SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin) _SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN) SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
SIMPLE_UNARY_FUNCTION_VOID(Sin, _Sin, MATH_SIN)
_SIMPLE_UNARY_FUNCTION(_Cos, cos) _SIMPLE_UNARY_FUNCTION(_Cos, cos)
_SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos) _SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos)
SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS) SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS)
SIMPLE_UNARY_FUNCTION_VOID(Cos, _Cos, MATH_COS)
_SIMPLE_UNARY_FUNCTION(_Tan, tan) _SIMPLE_UNARY_FUNCTION(_Tan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan) _SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN) SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
SIMPLE_UNARY_FUNCTION_VOID(Tan, _Tan, MATH_TAN)
/*_SIMPLE_UNARY_FUNCTION(_Round, round) /*_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round) _SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
......
...@@ -34,6 +34,8 @@ void _AbsoluteMe(XTensor * a); ...@@ -34,6 +34,8 @@ void _AbsoluteMe(XTensor * a);
/* set every entry to its absolute value (return an XTensor structure) /* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Absolute(const XTensor & a); XTensor Absolute(const XTensor & a);
/* set every entry to its absolute value */
void Absolute(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its ceil value */ /* set every entry to its ceil value */
void _Ceil(const XTensor * a, XTensor * b); void _Ceil(const XTensor * a, XTensor * b);
...@@ -43,6 +45,8 @@ void _CeilMe(XTensor * a); ...@@ -43,6 +45,8 @@ void _CeilMe(XTensor * a);
/* set every entry to its ceil value (return an XTensor structure) /* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a); XTensor Ceil(const XTensor & a);
/* set every entry to its ceil value */
void Ceil(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its exponent value */ /* set every entry to its exponent value */
void _Exp(const XTensor * a, XTensor * b); void _Exp(const XTensor * a, XTensor * b);
...@@ -52,6 +56,8 @@ void _ExpMe(XTensor * a); ...@@ -52,6 +56,8 @@ void _ExpMe(XTensor * a);
/* set every entry to its exponent value (return an XTensor structure) /* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Exp(const XTensor & a); XTensor Exp(const XTensor & a);
/* set every entry to its exponent value */
void Exp(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its floor value */ /* set every entry to its floor value */
void _Floor(const XTensor * a, XTensor * b); void _Floor(const XTensor * a, XTensor * b);
...@@ -61,6 +67,8 @@ void _FloorMe(XTensor * a); ...@@ -61,6 +67,8 @@ void _FloorMe(XTensor * a);
/* set every entry to its floor value (return an XTensor structure) /* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a); XTensor Floor(const XTensor & a);
/* set every entry to its floor value */
void Floor(const XTensor & a, XTensor & b, bool requireLink = false);
/* if source entry is non-zero, set target entry to be one, otherwise zero */ /* if source entry is non-zero, set target entry to be one, otherwise zero */
void _IsNonZero(const XTensor *a, XTensor *b); void _IsNonZero(const XTensor *a, XTensor *b);
...@@ -70,6 +78,8 @@ void _IsNonZeroMe(XTensor *a); ...@@ -70,6 +78,8 @@ void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure) /* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a); XTensor IsNonZero(const XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void IsNonZero(const XTensor &a, XTensor & b, bool requireLink = false);
/* if source entry is zero, set target entry to be one, otherwise zero */ /* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b); void _IsZero(const XTensor *a, XTensor *b);
...@@ -79,6 +89,8 @@ void _IsZeroMe(XTensor *a); ...@@ -79,6 +89,8 @@ void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure) /* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a); XTensor IsZero(const XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero */
void IsZero(const XTensor &a, XTensor & b, bool requireLink = false);
/* set every entry to its logarithm value */ /* set every entry to its logarithm value */
void _Log(const XTensor * a, XTensor * b); void _Log(const XTensor * a, XTensor * b);
...@@ -88,6 +100,8 @@ void _LogMe(XTensor * a); ...@@ -88,6 +100,8 @@ void _LogMe(XTensor * a);
/* set every entry to its logarithm value (return an XTensor structure) /* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Log(const XTensor & a); XTensor Log(const XTensor & a);
/* set every entry to its logarithm value */
void Log(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its round value */ /* set every entry to its round value */
void _Round(const XTensor * a, XTensor * b); void _Round(const XTensor * a, XTensor * b);
...@@ -97,6 +111,8 @@ void _RoundMe(XTensor * a); ...@@ -97,6 +111,8 @@ void _RoundMe(XTensor * a);
/* set every entry to its round value (return an XTensor structure) /* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a); XTensor Round(const XTensor & a);
/* set every entry to its round value */
void Round(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its sqrt value */ /* set every entry to its sqrt value */
void _Sqrt(const XTensor * a, XTensor * b); void _Sqrt(const XTensor * a, XTensor * b);
...@@ -106,6 +122,8 @@ void _SqrtMe(XTensor * a); ...@@ -106,6 +122,8 @@ void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (return an XTensor structure) /* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a); XTensor Sqrt(const XTensor & a);
/* set every entry to its sqrt value */
void Sqrt(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its square value */ /* set every entry to its square value */
void _Square(const XTensor * a, XTensor * b); void _Square(const XTensor * a, XTensor * b);
...@@ -115,6 +133,8 @@ void _SquareMe(XTensor * a); ...@@ -115,6 +133,8 @@ void _SquareMe(XTensor * a);
/* set every entry to its square value (return an XTensor structure) /* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a); XTensor Square(const XTensor & a);
/* set every entry to its square value */
void Square(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its sine value */ /* set every entry to its sine value */
...@@ -125,6 +145,8 @@ void _SinMe(XTensor * a); ...@@ -125,6 +145,8 @@ void _SinMe(XTensor * a);
/* set every entry to its sine value (return an XTensor structure) /* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Sin(const XTensor & a); XTensor Sin(const XTensor & a);
/* set every entry to its sine value */
void Sin(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its cosine value */ /* set every entry to its cosine value */
void _Cos(const XTensor * a, XTensor * b); void _Cos(const XTensor * a, XTensor * b);
...@@ -134,6 +156,8 @@ void _CosMe(XTensor * a); ...@@ -134,6 +156,8 @@ void _CosMe(XTensor * a);
/* set every entry to its cosine value (return an XTensor structure) /* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Cos(const XTensor & a); XTensor Cos(const XTensor & a);
/* set every entry to its cosine value */
void Cos(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its tangent value */ /* set every entry to its tangent value */
void _Tan(const XTensor * a, XTensor * b); void _Tan(const XTensor * a, XTensor * b);
...@@ -143,6 +167,8 @@ void _TanMe(XTensor * a); ...@@ -143,6 +167,8 @@ void _TanMe(XTensor * a);
/* set every entry to its tangent value (return an XTensor structure) /* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a); XTensor Tan(const XTensor & a);
/* set every entry to its tangent value */
void Tan(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -229,10 +229,10 @@ XTensor CopyIndexed(const XTensor & s, int dim, ...@@ -229,10 +229,10 @@ XTensor CopyIndexed(const XTensor & s, int dim,
/* call _CopyIndexed function */ /* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, &srcIndex, &tgtIndex, copyNum); _CopyIndexed(&s, &t, dim, &srcIndex, &tgtIndex, copyNum);
XList list(3); TensorList list(3);
list.Add(&s); list.Add((XTensor*)&s);
list.Add(&srcIndex); list.Add((XTensor*)&srcIndex);
list.Add(&tgtIndex); list.Add((XTensor*)&tgtIndex);
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED); XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED);
......
...@@ -131,4 +131,43 @@ XTensor ReduceMax(const XTensor &input, int dim) ...@@ -131,4 +131,43 @@ XTensor ReduceMax(const XTensor &input, int dim)
return output; return output;
} }
/*
get the max value of the items along a dimension of the tensor
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceMax function */
_ReduceMax(&input, &output, dim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -35,6 +35,9 @@ make a new tensor to keep the result and return it ...@@ -35,6 +35,9 @@ make a new tensor to keep the result and return it
*/ */
XTensor ReduceMax(const XTensor &input, int dim); XTensor ReduceMax(const XTensor &input, int dim);
/* get the max value of the items along a dimension of the tensor. */
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __REDUCEMAX_H__ #endif // __REDUCEMAX_H__
...@@ -86,4 +86,45 @@ XTensor ReduceMean(const XTensor &input, int dim) ...@@ -86,4 +86,45 @@ XTensor ReduceMean(const XTensor &input, int dim)
return output; return output;
} }
/*
get the mean value along a dimension of the tensor
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceMean function */
_ReduceMean(&input, &output, dim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -39,6 +39,12 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -39,6 +39,12 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/ */
XTensor ReduceMean(const XTensor &input, int dim); XTensor ReduceMean(const XTensor &input, int dim);
/*
get the mean value along a dimension of the tensor
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __REDUCEMEAN_H__ #endif // __REDUCEMEAN_H__
...@@ -244,6 +244,39 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -244,6 +244,39 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
return output; return output;
} }
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power, bool isExp, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
}
}
/* /*
sum the items along a dimension of the tensor (return an XTensor structure) sum the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -290,4 +323,52 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp) ...@@ -290,4 +323,52 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
return output; return output;
} }
/*
sum the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^power if isExp == false
sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> shift - shift the input
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
>> requireLink - if add operation to network
*/
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool isExp, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论