Commit 314f4370 by liyinqiao

Merge branch 'liyinqiao' into xiaotong-working

# Conflicts:
#	source/tensor/XDevice.cpp
#	source/tensor/XMem.cpp
#	source/tensor/XTensor.cpp
parents 7c17670d 58181c8d
......@@ -20,7 +20,9 @@
*/
#include "XBackwardLoss.h"
#include "XNoder.h"
#include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/getandset/SetData.h"
#include "../tensor/function/HardTanH.h"
#include "../tensor/function/Identity.h"
......@@ -31,6 +33,60 @@
namespace nts{
/* compute dE/dx of a node */
void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(income.tailNum >= 1, "Wrong number of tensors for loss computation!");
XTensor * output = income.tails[0];
XTensor * gold = NULL;
XTensor * weight = NULL;
XTensor * padding = NULL;
int leadingDim;
XNoder::MakeGrad(output);
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
return;
}
gold = income.tails[1];
if(operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
padding = income.tails[2];
leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
}
else{
ShowNTErrors("Wrong activation function type!");
}
node->visitMark = NODE_FINISHED;
}
/* indicates whether the node is for a loss computation */
bool XLossGrad::IsLossOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & LOSS_BASE) != 0;
}
/*
compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss)
......
......@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__
......@@ -34,6 +35,14 @@ namespace nts{
class XLossGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a Loss computation */
static
bool IsLossOP(XTensor * node);
/* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * padding,
......
......@@ -81,6 +81,12 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradPower(node, isEfficient);
else if(operID == MATH_SCALEANDSHIFT)
GradScaleAndShift(node, isEfficient);
else if(operID == MATH_SCALE)
GradScale(node, isEfficient);
else if(operID == MATH_DESCALE)
GradDescale(node, isEfficient);
else if(operID == MATH_SHIFT)
GradShift(node, isEfficient);
else if(operID == MATH_SUB)
GradSub(node, isEfficient);
else if(operID == MATH_SUBDIM)
......@@ -719,12 +725,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!");
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
_Multiply(node->grad, b, a->grad, 1.0F);
}
if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b);
_Multiply(node->grad, a, b->grad, 1.0F);
}
node->visitMark = NODE_FINISHED;
}
......@@ -888,88 +900,8 @@ gradient for normalize
*/
void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
{
ShowNTErrors("This is really a bad piece of code!!!");
XLink &income = node->income;
CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");
XTensor * input = income.tails[0];
XTensor * mean = income.tails[1];
XTensor * var = income.tails[2];
XTensor * a = income.tails[3];
XTensor * b = income.tails[4];
XTensor * c = NewTensor(var);
XTensor * d = NewTensor(a);
XTensor * e = NewTensor(a);
XTensor * f = NewTensor(a);
XTensor * g = NewTensor(a);
XTensor * h = NewTensor(a);
XTensor * i = NewTensor(a);
XTensor * j = NewTensor(a);
XTensor * k = NewTensor(var);
XTensor * p = NewTensor(var);
XTensor * q = NewTensor(var);
XTensor * r = NewTensor(a);
XTensor * x = NewTensor(mean);
XTensor * y = NewTensor(mean);
XTensor * z = NewTensor(mean);
DTYPE epsilon = income.GetParam(1);
int dim = income.GetParamInt(0);
int n = a->GetDim(dim);
XNoder::MakeGrad(input);
XNoder::MakeGrad(mean);
XNoder::MakeGrad(var);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
/* dEdinput */
_ScaleAndShift(var, c, 1.0F, epsilon);
_Unsqueeze(c, d, dim, n);
_Power(d, e, -0.5F);
_Multiply(a, e, f);
_Multiply(node->grad, f, input->grad, 1.0F);
/* dEdmean */
_ScaleAndShift(f, g, -1.0F);
_ReduceSum(g, x, dim);
_ReduceSum(node->grad, y, dim);
_Multiply(y, x, mean->grad, 1.0F);
/* dEdvar */
_Unsqueeze(mean, h, dim, n);
_Sub(input, h, i);
_Multiply(a, i, j);
_Power(var, k, -1.5F);
_ScaleAndShift(k, p, -0.5F);
_ReduceSum(j, z, dim);
_Multiply(z, p, q);
_Multiply(y, q, var->grad, 1.0F);
/* dEda */
_Multiply(i, e, r);
_Multiply(node->grad, r, a->grad, 1.0F);
/* dEdb */
_Sum(b->grad, node->grad, b->grad);
node->visitMark = NODE_FINISHED;
ShowNTErrors("TODO!");
delete c;
delete d;
delete e;
delete f;
delete g;
delete h;
delete i;
delete j;
delete k;
delete p;
delete q;
delete r;
delete x;
delete y;
delete z;
}
/*
......@@ -1030,6 +962,82 @@ void XMathGrad::GradScaleAndShift(XTensor * node, bool isEfficient)
}
/*
gradient for Scale
for
c = a * scale
we have
dE/da = dE/dc * scale
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradScale(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SCALE!");
XTensor * a = income.tails[0];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad, scale);
node->visitMark = NODE_FINISHED;
}
/*
gradient for Descale
for
c = a / descale
we have
dE/da = dE/dc / descale
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradDescale(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for DESCALE!");
XTensor * a = income.tails[0];
DTYPE descale = income.GetParam(0);
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad, 1/descale);
node->visitMark = NODE_FINISHED;
}
/*
gradient for Shift
for
c = a + shift
we have
dE/da = dE/dc
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradShift(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SHIFT!");
XTensor * a = income.tails[0];
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad);
node->visitMark = NODE_FINISHED;
}
/*
gradient for minus
for
c = a - b * \beta
......
......@@ -130,6 +130,18 @@ private:
static
void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Scale */
static
void GradScale(XTensor * node, bool isEfficient);
/* gradient for Shift */
static
void GradShift(XTensor * node, bool isEfficient);
/* gradient for Descale */
static
void GradDescale(XTensor * node, bool isEfficient);
/* gradient for Minus */
static
void GradSub(XTensor * node, bool isEfficient);
......
......@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
GradCopyIndexed(node, isEfficent);
else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent);
else if (operID == MOVEMENT_DROPOUTWITHINDEX)
GradDropoutWithIndex(node, isEfficent);
else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST)
......@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!");
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
......@@ -127,6 +129,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
}
/*
gradient computation for DropoutWithIndex function
*/
void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(input);
//_Identity(node->grad, input->grad);
_CopyValues(node->grad, input->grad);
int order = node->grad->order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = node->grad->dimSize[i];
}
int order1 = 1;
int * dimSize1 = new int[order1];
dimSize1[0] = input->grad->unitNum;
input->grad->Reshape(order1, dimSize1);
_DropoutWithIndex(node->grad, index, input->grad);
_ScaleAndShiftMe(input->grad, scale);
input->grad->Reshape(order, dimSize);
node->visitMark = NODE_FINISHED;
}
/*
gradient for merge
for
c = merge(a_0, a_1, ...)
......@@ -232,8 +271,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL;
XList smalls(income.tailNum);
XList smallsGrad(income.tailNum);
TensorList smalls(income.tailNum);
TensorList smallsGrad(income.tailNum);
bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i];
......@@ -401,7 +440,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
/* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */
XLink &outgo = node->outgo;
XList splits(outgo.tailNum);
TensorList splits(outgo.tailNum);
int whereToSplit = -1;
int splitNum = 0;
......
......@@ -54,6 +54,10 @@ private:
static
void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node, bool isEfficent);
......
......@@ -55,7 +55,7 @@ void XNetClearAll()
XNet::XNet()
{
nodes.Clear();
isGradEfficient = true;
isGradEfficient = false;
}
/* de-constructor */
......@@ -79,13 +79,13 @@ backward propagation to obtain gradient
*/
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
TensorList roots(1);
roots.Add(&root);
XList golds(1);
TensorList golds(1);
golds.Add(NULL);
XList paddings(1);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
......@@ -99,13 +99,13 @@ backward propagation to obtain gradient wrt. the loss/error function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
TensorList roots(1);
roots.Add(&root);
XList golds(1);
TensorList golds(1);
golds.Add(&gold);
XList paddings(1);
TensorList paddings(1);
paddings.Add(NULL);
Backward(roots, golds, paddings, loss);
......@@ -120,13 +120,13 @@ backward propagation to obtain gradient wrt. the loss/error function
*/
void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
TensorList roots(1);
roots.Add(&root);
XList golds(1);
TensorList golds(1);
golds.Add(&gold);
XList paddings(1);
TensorList paddings(1);
paddings.Add(&padding);
Backward(roots, golds, paddings, loss);
......@@ -138,10 +138,10 @@ with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
{
XList golds(roots.count);
XList paddings(roots.count);
TensorList golds(roots.count);
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++) {
golds.Add(NULL);
paddings.Add(NULL);
......@@ -157,9 +157,9 @@ with a number of root nodes
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
{
XList paddings(roots.count);
TensorList paddings(roots.count);
for (int i = 0; i < roots.count; i++)
paddings.Add(NULL);
......@@ -174,7 +174,7 @@ with a number of root nodes
>> paddings - specify a target value that is ignored
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss)
void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss)
{
Traverse(roots);
......@@ -190,18 +190,18 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
/*for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XTensor * padding = (XTensor*)paddings.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
void * params = income.params;*/
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
/*if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
if(funcID == FUNC_LOGSOFTMAX || funcID == FUNC_SOFTMAX) {
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
......@@ -212,13 +212,13 @@ void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_N
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, padding, loss);
}
}
}*/
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
/*else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, NULL, loss);
}
}
}*/
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
......@@ -266,6 +266,8 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent);
else{
ShowNTErrors("Wrong node type!");
}
......@@ -300,7 +302,7 @@ depth-first search (Tarjan's algorithm)
*/
void XNet::Traverse(XTensor &root)
{
XList roots(1);
TensorList roots(1);
roots.Add(&root);
Traverse(roots);
......@@ -311,7 +313,7 @@ traverse the net and find the topological order by
depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes)
*/
void XNet::Traverse(XList &roots)
void XNet::Traverse(TensorList &roots)
{
id = MakeNetID();
nodes.Clear();
......@@ -336,7 +338,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering)
>> orders - topological order of the nodes
>> code - code of the network
*/
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
void XNet::TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code)
{
if(node == NULL)
return;
......@@ -444,7 +446,7 @@ show network topology
*/
void XNet::ShowNetwork(FILE * file, XTensor * node)
{
XList roots(1);
TensorList roots(1);
roots.Add(node);
Traverse(roots);
......@@ -464,9 +466,9 @@ search for a node in a top-down manner by its name
>> top - the top most node
<< return - the node we found
*/
XTensor * XNet::SearchNode(XTensor * top, const char * name)
{
return XLink::SearchNode(top, name);
}
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{
//return XLink::SearchNode(top, name);
//}
}
......@@ -23,6 +23,7 @@
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/loss/LHeader.h"
#ifndef __XNET_H__
#define __XNET_H__
......@@ -36,16 +37,16 @@ struct XNet
unsigned int id;
/* tensor nodes of the network (in order) */
XList nodes;
TensorList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes;
TensorList gradNodes;
/* output nodes of the network */
XList outputs;
TensorList outputs;
/* input nodes of the network */
XList inputs;
TensorList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient;
......@@ -70,15 +71,15 @@ struct XNet
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */
void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */
void BackwardNode(XTensor * node, bool isEfficent = false);
......@@ -92,10 +93,10 @@ struct XNet
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots);
void Traverse(TensorList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code);
void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);
/* dump network information */
void Dump(FILE * file);
......@@ -113,8 +114,8 @@ struct XNet
void ShowNetwork(FILE * file, XTensor * node);
/* search a node in a top-down manner by its name */
static
XTensor * SearchNode(XTensor * top, const char * name);
//static
//XTensor * SearchNode(XTensor * top, const char * name);
};
/* we make a unique id for every tensor */
......
......@@ -247,13 +247,13 @@ void Check(FNNModel &model)
/* make a hard copy of the fnn model */
void Copy(FNNModel &tgt, FNNModel &src)
{
InitTensor(&tgt.embeddingW, &src.embeddingW);
InitTensorV2(&tgt.embeddingW, &src.embeddingW);
for(int i = 0; i < MAX_HIDDEN_NUM; i++){
InitTensor(&tgt.hiddenW[i], &src.hiddenW[i]);
InitTensor(&tgt.hiddenB[i], &src.hiddenB[i]);
InitTensorV2(&tgt.hiddenW[i], &src.hiddenW[i]);
InitTensorV2(&tgt.hiddenB[i], &src.hiddenB[i]);
}
InitTensor(&tgt.outputW, &src.outputW);
InitTensor(&tgt.outputB, &src.outputB);
InitTensorV2(&tgt.outputW, &src.outputW);
InitTensorV2(&tgt.outputB, &src.outputB);
tgt.n = src.n;
tgt.eSize = src.eSize;
......@@ -310,7 +310,7 @@ initialize a 1d tensor using the fnn model setting
*/
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model)
{
InitTensor1D(&tensor, num, X_FLOAT, model.devID, model.mem);
InitTensor1DV2(&tensor, num, X_FLOAT, model.devID);
}
/*
......@@ -322,7 +322,7 @@ initialize a 2d tensor using the fnn model setting
*/
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model)
{
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, model.devID, model.mem);
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, model.devID);
}
......@@ -449,6 +449,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the gold standard */
XTensor gold;
/* the loss tensor */
XTensor lossTensor;
/* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem);
......@@ -466,6 +469,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* forward computation */
Forward(inputs, output, model, net);
/* backward computation to obtain gradients */
Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
......@@ -483,9 +488,11 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */
autoDiffer.Backward(output, gold, CROSSENTROPY);
autoDiffer.Backward(lossTensor);
//autoDiffer.Backward(output, gold, CROSSENTROPY);
/* update model parameters */
Update(model, grad, learningRate, true);
......@@ -494,7 +501,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* get probabilities */
float prob = GetProb(output, gold);
loss += -prob;
prob = ReduceSumAll(lossTensor);
loss += prob;
wordCount += ngramNum;
wordCountTotal += ngramNum;
......@@ -537,8 +546,8 @@ update the model parameters using the delta rule
*/
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
{
XList paraList(10);
XList gradList(10);
TensorList paraList(10);
TensorList gradList(10);
paraList.Add(&model.outputW);
paraList.Add(&model.outputB);
......@@ -595,14 +604,14 @@ get prediction probabilites of the gold words
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
{
XTensor probs;
InitTensor(&probs, &output);
InitTensorV2(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */
_Multiply(&output, &gold, &probs);
/* probability of each word */
XTensor wprobs;
InitTensor1D(&wprobs, output.GetDim(0), output.dataType, output.devID, output.mem);
InitTensor1DV2(&wprobs, output.GetDim(0), output.dataType, output.devID);
_ReduceSum(&probs, &wprobs, 1);
if(wordProbs != NULL)
_CopyValues(&wprobs, wordProbs);
......@@ -616,7 +625,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability for the batch */
XTensor result;
InitTensor1D(&result, 1, X_FLOAT, output.devID, output.mem);
InitTensor1DV2(&result, 1, X_FLOAT, output.devID);
_ReduceSum(&probs, &result, 1);
return result.Get1D(0);
......@@ -718,7 +727,7 @@ The indexed cell is set to 1, and 0 otherwise.
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID, XMem * mem)
{
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, devID, mem);
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, devID);
tensor.SetZeroAll();
......@@ -765,7 +774,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
int batchSize = -1;
int n = model.n;
int depth = model.hDepth;
XList eList(n - 1);
TensorList eList(n - 1);
/* previoius n - 1 words */
for(int i = 0; i < n - 1; i++){
......@@ -811,7 +820,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* make a 2d tensor for the bias term */
XTensor b2D;
InitTensor(&b2D, &s);
InitTensorV2(&b2D, &s);
_Unsqueeze(&b, &b2D, 0, batchSize);
/* introduce bias term:
......@@ -843,7 +852,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
_MatrixMul(&h_last, X_NOTRANS, &w, X_NOTRANS, &s);
XTensor b2D;
InitTensor(&b2D, &s);
InitTensorV2(&b2D, &s);
_Unsqueeze(&b, &b2D, 0, batchSize);
_Sum(&s, &b2D, &s);
......@@ -908,8 +917,8 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
XTensor dedsHidden;
XTensor dedxBottom;
if (depth > 0)
InitTensor(&dedsHidden, &dedx);
InitTensor(&dedxBottom, &net.embeddingCat);
InitTensorV2(&dedsHidden, &dedx);
InitTensorV2(&dedxBottom, &net.embeddingCat);
/* back-propagation from top to bottom in the stack of hidden layers
for each layer, h = f(s)
......@@ -943,11 +952,11 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
_CopyValues(&dedx, &gradPassed);
}
XList eList(n - 1);
TensorList eList(n - 1);
/* back-propagation for the embedding layer */
for (int i = 0; i < n - 1; i++) {
XTensor * dedy = NewTensor2D(batchSize, model.eSize, X_FLOAT, model.devID, model.mem);
XTensor * dedy = NewTensor2DV2(batchSize, model.eSize, X_FLOAT, model.devID);
eList.Add(dedy);
}
......@@ -999,7 +1008,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
}
}
InitTensor1D(&words, size, X_INT, model.devID, model.mem);
InitTensor1DV2(&words, size, X_INT, model.devID);
words.SetData(index, size);
embeddingBig = Gather(model.embeddingW, words);
......@@ -1017,7 +1026,8 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
hidden = HardTanH(MMul(hidden, model.hiddenW[i]) + model.hiddenB[i]);
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
output = Softmax(MMul(hidden, model.outputW) + model.outputB, 1);
}
/*
......@@ -1036,7 +1046,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
XTensor hidden;
XTensor b;
XList inputList(n - 1);
TensorList inputList(n - 1);
for(int i = 0; i < n - 1; i++)
inputList.Add(inputs + i);
......@@ -1177,7 +1187,7 @@ void Test(const char * test, const char * result, FNNModel &model)
/* prediction probabilities */
XTensor probs;
InitTensor1D(&probs, ngramNum);
InitTensor1DV2(&probs, ngramNum);
/* get probabilities */
float prob = GetProb(output, gold, &probs);
......
......@@ -127,7 +127,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
XTensor q2;
XTensor v2;
XTensor kqv2;
XList split;
TensorList split;
kqv2 = MMul(kqv, wbig);
......
......@@ -85,7 +85,7 @@ void T2TModel::InitModel(int argc, char ** argv)
if(isMT)
decoder->InitModel(argc, argv, true, 0, devID, mem);
XList params(10);
TensorList params(10);
GetParams(params);
for(int i = 0; i < params.count; i++){
......@@ -403,7 +403,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
get parameter matrics
>> list - the list that keeps the parameter matrics
*/
void T2TModel::GetParams(XList &list)
void T2TModel::GetParams(TensorList &list)
{
list.Clear();
list.Add(&outputLayer->w);
......@@ -465,7 +465,7 @@ void T2TModel::Dump(const char * fn)
FILE * file = fopen(fn, "wb");
CheckNTErrors(file, "Cannot open the model file");
XList params(100);
TensorList params(100);
GetParams(params);
......@@ -489,7 +489,7 @@ void T2TModel::Read(const char * fn)
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot open the model file");
XList params(100);
TensorList params(100);
GetParams(params);
......
......@@ -98,7 +98,7 @@ public:
XTensor &maskDec, XTensor &maskEncDec);
/* get parameter matrics */
void GetParams(XList &list);
void GetParams(TensorList &list);
/* dump the parameters */
void Dump(const char * fn);
......
......@@ -93,9 +93,8 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{
XTensor &x = input;
output = LogSoftmax(MMul(x, w), -1);
//output = Softmax(MMul(x, w), -1);
//output = LogSoftmax(MMul(x, w), -1);
output = Softmax(MMul(x, w), -1);
output.SetName(OUTPUT_NAME);
}
......
......@@ -174,12 +174,13 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
_SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if(inputLast == NULL){
if (inputLast == NULL) {
inputDec = Identity(first);
}
else{
inputDec = GeneratePaths(s);
inputDec.SetDevice(inputEnc->devID, inputEnc->mem);
inputDec = Concatenate(first, inputDec, inputDec.order - 1);
}
......
......@@ -96,10 +96,10 @@ public:
/* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */
XList layersEnc;
TensorList layersEnc;
/* layers on the decoder side */
XList layersDec;
TensorList layersDec;
/* list of states */
T2TState * states;
......
......@@ -197,12 +197,13 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize);
probPath.Reshape(score.unitNum/outputSize, outputSize);
probPath.Reshape(score.unitNum / outputSize, outputSize);
probPathPrev.Reshape(probPathPrev.unitNum);
/* the log-scale probability of the entire sequence */
_SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensor(&len, &lenPrev);
InitTensor(&lp, &lenPrev);
......@@ -302,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */
Mod(index, sizeVocab);
_ModMe(index, sizeVocab);
score.Reshape(order, dims);
......@@ -315,18 +316,19 @@ void T2TSearch::Generate(T2TStateBundle * beam)
InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1);
CopyValues(index, indexCPU);
for(int i = 0; i < indexCPU.unitNum; i++)
for (int i = 0; i < indexCPU.unitNum; i++)
indexCPU.SetInt(i * stride + indexCPU.GetInt(i), i);
CheckNTErrors(XTensor::IsSameShaped(&prob, &probPath), "Wrong tensor shape!");
/* sequence probability and prediction probability of top-k candidates */
/* sequence probability of top-k candidates */
XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK);
XTensor probTopK;
InitTensor(&probTopK, &scoreTopK);
for(int i = 0; i < probPath.order; i++){
for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i);
dimsTopK[i] = probPathTopK.GetDim(i);
}
......@@ -342,6 +344,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
probPath.Reshape(order, dims);
probPathTopK.Reshape(order, dimsTopK);
prob.Reshape(order, dims);
probTopK.Reshape(order, dimsTopK);
......@@ -396,7 +399,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
modification of the states. An alternative is to do this on GPUs but
it needs much more coding work and the speed-up is not obvious. */
for(int i = 0; i < beam->stateNum; i += beamSize){
for(int j = 0; j < beamSize; j++){
for (int j = 0; j < beamSize; j++) {
int k = i + j;
T2TState & state = states[k];
......@@ -413,7 +416,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
state.nstep = 0;
state.isCompleted = false;
}
else{
else {
state.last = last;
state.pid = state.last->pid;
state.nstep = last->nstep + 1;
......@@ -517,7 +520,7 @@ void T2TSearch::Dump(XTensor * output)
/* we track the state from the end to the beginning */
while(state != NULL){
if(!state->isCompleted)
if (!state->isCompleted)
isCompleted = false;
if (isCompleted)
words[count++] = -1;
......@@ -589,7 +592,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) {
if(i % beamSize != 0)
if (i % beamSize != 0)
mask.Set(-1e9, i);
}
......
......@@ -154,8 +154,8 @@ void T2TTester::Dump(FILE * file, XTensor * output)
{
int seqLength = output->GetDim(-1);
for(int i = 0; i < output->unitNum; i += seqLength){
for(int j = 0; j < seqLength; j++){
for (int i = 0; i < output->unitNum; i += seqLength) {
for (int j = 0; j < seqLength; j++) {
int w = output->GetInt(i + j);
fprintf(file, "%d ", w);
if (w < 0)
......
......@@ -24,6 +24,7 @@
#include "T2TUtility.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/core/CHeader.h"
#include "../../tensor/loss/LHeader.h"
#include "../../network/XNoder.h"
#ifndef WIN32
......@@ -209,13 +210,16 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
labelOnehot = IndexToOnehot(label, vSizeTgt, labelSmoothingP);
/* make paddings for the output */
if (output.GetDim(0) > 0)
PadOutput(&output, &labelOnehot, &paddingDec);
//if (output.GetDim(0) > 0)
//PadOutput(&output, &labelOnehot, &paddingDec);
/* get probabilities */
float prob = GetProb(&output, &labelOnehot, NULL);
//float prob = GetProb(&output, &labelOnehot, NULL);
XTensor lossTensor;
lossTensor = CrossEntropy(output, labelOnehot, paddingDec);
float prob = ReduceSumAll(lossTensor);
DTYPE lossLocal = -prob / wc;
DTYPE lossLocal = prob / wc;
bool doUpdate = (!IsNAN(lossLocal) && !IsINF(lossLocal) && lossLocal < 1e3F);
//XTensor &g = labelSmoothingP > 0 ? goldSmoothed : gold;
......@@ -223,14 +227,15 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if (doUpdate) {
/* recale the output for normalized loss */
RescaleOutput(&output, &labelOnehot, &paddingDec);
//RescaleOutput(&output, &labelOnehot, &paddingDec);
/* back-propagation */
net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
net.Backward(lossTensor);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
gradStep += 1;
loss += -prob;
loss += prob;
wordCount += wc;
wordCountTotal += wc;
......@@ -260,7 +265,7 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if (step % 100 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT8(0, stderr, "[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f",
elapsed, step, epoch, wordCountTotal, wordCountBatch, loss/wordCount, exp(loss/wordCount), exp(-prob/wc));
elapsed, step, epoch, wordCountTotal, wordCountBatch, loss/wordCount, exp(loss/wordCount), exp(prob/wc));
if (!doUpdate)
XPRINT(0, stderr, " (no update)");
XPRINT(0, stderr, "\n");
......@@ -491,7 +496,7 @@ where
*/
void T2TTrainer::Update(T2TModel * model, const float lr)
{
XList ws(100);
TensorList ws(100);
model->GetParams(ws);
......@@ -552,7 +557,7 @@ void T2TTrainer::PrepareModel(T2TModel * model)
moments.Clear();
moments2nd.Clear();
XList ws(100);
TensorList ws(100);
model->GetParams(ws);
......
......@@ -82,10 +82,10 @@ public:
float adamBeta2T;
/* list of the moment of the parameter matrics */
XList moments;
TensorList moments;
/* list of the 2nd order moment of the parameter matrics */
XList moments2nd;
TensorList moments2nd;
/* indicates whether the data file is shuffled for training */
bool isShuffled;
......
......@@ -30,6 +30,7 @@
#include "XDevice.h"
#include "./test/Test.h"
#include "./core/CHeader.h"
#include "./loss/CrossEntropy.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
......
......@@ -476,7 +476,7 @@ split a string
>> items - splitting result
<< return - how many items are there
*/
int SplitALine(char * inputString, const char * seperator, XList * items)
int SplitALine(char * inputString, const char * seperator, StrList* items)
{
items->Clear();
......@@ -530,7 +530,7 @@ get device ids for the given device information
*/
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{
XList * terms = new XList(1);
StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){
......
......@@ -49,7 +49,7 @@ namespace nts {
#ifdef DOUBELPRICSION
#define DTYPE double
#define DTYPE_MIN (DTYPE)1.79E+308
#define DTYPE_MIN (DTYPE)-1.79E+308
#else
#define DTYPE float
#define DTYPE_MIN (DTYPE)-3.40E+38
......
......@@ -300,9 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL)
return;
XList list(2);
list.Add(t1);
list.Add(t2);
TensorList list(2);
list.Add((XTensor*)t1);
list.Add((XTensor*)t2);
MakeLink(&list, h, id);
}
......@@ -320,10 +320,10 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
if (h == NULL)
return;
XList list(3);
list.Add(t1);
list.Add(t2);
list.Add(t3);
TensorList list(3);
list.Add((XTensor*)t1);
list.Add((XTensor*)t2);
list.Add((XTensor*)t3);
MakeLink(&list, h, id);
}
......@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor
>> id - id of the edge type
*/
void XLink::MakeLink(const XList * list, XTensor * h, int id)
void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
{
/* forward */
XLink &income = h->income;
......@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors
>> list - a list of output tensors
>> id - id of the edge type
*/
void XLink::MakeLink(XTensor * t, XList * list, int id)
void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{
/* forward */
for(int i = 0; i < list->count; i++){
......@@ -528,10 +528,90 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
strcpy(newOne->name, oldOne->name);
}
/*
copy a node with another, i.e., we add the links to the new node
>> src - the node to be copied
>> tgt - the new node
*/
void XLink::Copy(const XTensor * reference, XTensor * target)
{
if (reference == NULL || target == NULL)
return;
XLink &newIncome = target->income;
XLink &newOutgo = target->outgo;
XLink::ClearOutgoing(target);
XLink::ClearIncoming(target);
/* incoming nodes */
if (reference->income.typeID != 0) {
if (newIncome.tailNum < reference->income.tailNum) {
delete[] newIncome.tails;
newIncome.tails = new XTensor*[reference->income.tailNum];
}
newIncome.SetType(reference->income.typeID);
newIncome.head = target;
newIncome.tailNum = reference->income.tailNum;
memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = reference->income.paramNum * reference->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, reference->income.params, paraArraySize);
newIncome.paramNum = reference->income.paramNum;
/* update the link to each child node */
for (int i = 0; i < newIncome.tailNum; i++) {
XTensor * child = newIncome.tails[i];
XLink &childOutgo = child->outgo;
bool hit = false;
for (int j = 0; j < childOutgo.tailNum; j++) {
if (childOutgo.tails[j] == reference) {
//childOutgo.tails[j] = target;
childOutgo.AddTail(target);
hit = true;
break;
}
}
if (childOutgo.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in child.outgo edge!");
}
}
}
if (newOutgo.tailNum < reference->outgo.tailNum) {
delete[] newOutgo.tails;
newOutgo.tails = new XTensor*[reference->outgo.tailNum];
}
/* outgoing nodes */
newOutgo.head = target;
newOutgo.tailNum = reference->outgo.tailNum;
memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
/* update the link to each parent node */
for (int i = 0; i < newOutgo.tailNum; i++) {
XTensor * parent = newOutgo.tails[i];
XLink &parentIncome = parent->income;
bool hit = false;
for (int j = 0; j < parentIncome.tailNum; j++) {
if (parentIncome.tails[j] == reference) {
//parentIncome.tails[j] = target;
parentIncome.AddTail(target);
hit = true;
}
}
if (parentIncome.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
}
/*
copy incoming edges of a given node
>> reference - the node we copy from
......@@ -544,7 +624,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
ClearIncoming(target);
int tailNum = reference->income.tailNum;
XList tails(tailNum);
TensorList tails(tailNum);
for(int i = 0; i < tailNum; i++){
XTensor * tail = (XTensor*)reference->income.tails[i];
tails.Add(tail);
......
......@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
#define MAX_OP_NAME_LENGTH 64
#define PARAM_UNTI_SIZE 64
/*
......@@ -144,11 +144,11 @@ struct XLink
/* create a hyper edge with a list of input tensors and a output tensor */
static
void MakeLink(const XList * list, XTensor * h, int id);
void MakeLink(const TensorList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */
static
void MakeLink(XTensor * h, XList * list, int id);
void MakeLink(XTensor * h, TensorList * list, int id);
/* add a parameter */
static
......@@ -174,6 +174,10 @@ struct XLink
static
void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy a node with another, i.e., we add the links to the new node */
static
void Copy(const XTensor * reference, XTensor * target);
/* copy links of a given node */
static
void CopyIncoming(const XTensor * reference, XTensor * target);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,43 +15,34 @@
* limitations under the License.
*/
/*
/*
*
* Implementation of list that keeps data items
* Implementation of template list that keeps data items
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17
* $Created by: HU Chi (huchinlp@foxmail.com)
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "XList.h"
#include "XMem.h"
#include "XGlobal.h"
#include "wchar.h"
#include "locale.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
#include "time.h"
#include "iconv.h"
#else
#include "time.h"
#endif
#include <ctime>
#include <utility>
#include <algorithm>
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
XList NULLList;
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
/* constructor */
XList::XList()
template <typename T>
TensorListBase<T>::TensorListBase()
{
mem = NULL;
maxNum = 8;
maxNum = 0;
count = 0;
items = new void*[8];
isIntList = false;
items = NULL;
}
/*
......@@ -59,13 +50,13 @@ constructor
>> myMaxNum - maximum number of items to keep
>> isIntListOrNot - specify if the list keeps int items
*/
XList::XList(int myMaxNum, bool isIntListOrNot)
template <typename T>
TensorListBase<T>::TensorListBase(int myMaxNum)
{
mem = NULL;
maxNum = myMaxNum;
count = 0;
items = new void*[myMaxNum];
isIntList = isIntListOrNot;
items = new T[myMaxNum];
}
/*
......@@ -74,63 +65,64 @@ constructor
>> myMem - the memory pool used for data allocation
>> isIntListOrNot - specify if the list keeps int items
*/
XList::XList(int myMaxNum, XMem * myMem, bool isIntListOrNot)
template <typename T>
TensorListBase<T>::TensorListBase(int myMaxNum, XMem* myMem)
{
mem = myMem;
maxNum = myMaxNum;
count = 0;
items = (void**)mem->Alloc(mem->devID, sizeof(void*) * maxNum);
isIntList = isIntListOrNot;
items = (T*)mem->Alloc(mem->devID, sizeof(T) * maxNum);
}
/* de-constructor */
XList::~XList()
template <typename T>
TensorListBase<T>::~TensorListBase()
{
if(isIntList){
for(int i = 0; i < count; i++){
int * p = (int*)items[i];
delete[] p;
}
}
if(mem == NULL)
delete[] items;
}
/*
allocate the data array for the list
>> myMaxNum - maximum number of items to keep
>> isIntListOrNot - specify if the list keeps int items
add an item into the list
>> item - a right value
*/
void XList::Create(int myMaxNum, XMem * myMem)
template <typename T>
void TensorListBase<T>::Add(T&& item)
{
mem = myMem;
maxNum = myMaxNum;
count = 0;
items = (void**)mem->Alloc(mem->devID, sizeof(void*) * maxNum);
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
items[count++] = item;
}
/*
add an item into the list
>> item - pointer to the item
>> item - a const reference to the item
*/
void XList::Add(const void * item)
template <typename T>
void TensorListBase<T>::Add(const T& item)
{
if( count == maxNum ){
void ** newItems;
if( mem == NULL )
newItems = new void*[maxNum * 2 + 1];
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (void**)mem->Alloc(mem->devID, sizeof(void*) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(void*) * maxNum);
if( mem == NULL )
delete[] items;
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
MTYPE p = (MTYPE)item;
items[count++] = (MTYPE*)p;
items[count++] = item;
}
/*
......@@ -138,22 +130,21 @@ add a number of items into the list
>> inputItems - pointer to the array of items
>> inputItemCount - number of input items
*/
void XList::Add(void ** inputItems, int inputItemCount)
template <typename T>
void TensorListBase<T>::Add(T* inputItems, int inputItemCount)
{
if( count + inputItemCount >= maxNum ){
if (count + inputItemCount >= maxNum) {
int newMaxNum = (count + inputItemCount) * 2 + 1;
void ** newItems;
if( mem == NULL )
newItems = new void*[newMaxNum];
T* newItems;
if (mem == NULL)
newItems = new T[newMaxNum];
else
newItems = (void**)mem->Alloc(mem->devID, sizeof(void*) * newMaxNum);
memcpy(newItems, items, sizeof(void*) * maxNum);
if( mem == NULL )
delete[] items;
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * newMaxNum);
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = newMaxNum;
}
memcpy(items + count, inputItems, sizeof(void*) * inputItemCount);
memcpy(items + count, inputItems, sizeof(T) * inputItemCount);
count += inputItemCount;
}
......@@ -161,83 +152,82 @@ void XList::Add(void ** inputItems, int inputItemCount)
append a list to the current list
>> l - the list we use to append
*/
void XList::AddList(XList * l)
template <typename T>
void TensorListBase<T>::AddList(TensorListBase* l)
{
Add(l->items, l->count);
}
/*
add an integer-typed item into the list
>> item - pointer to the item
insert an item to the given position of the list
>> pos - the position
>> item - the item for insertion
*/
void XList::AddInt(int i)
template <typename T>
void TensorListBase<T>::Insert(int pos, const T& item)
{
CheckNTErrors(isIntList, "An int list is required!");
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
int * a = new int[1];
*a = i;
Add(a);
for (int i = count - 1; i >= pos; i--)
items[i + 1] = items[i];
items[pos] = item;
count++;
}
/*
insert an item to the given position of the list
>> pos - the position
>> item - the item for insertion
*/
void XList::Insert(int pos, void * item)
template<typename T>
void TensorListBase<T>::Insert(int pos, T&& item)
{
if( count == maxNum ){
void ** newItems;
if( mem == NULL )
newItems = new void*[maxNum * 2 + 1];
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (void**)mem->Alloc(mem->devID, sizeof(void*) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(void*) * maxNum);
if( mem == NULL )
delete[] items;
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
for(int i = count - 1; i >= pos; i--)
for (int i = count - 1; i >= pos; i--)
items[i + 1] = items[i];
items[pos] = item;
count++;
}
/* get the item at position i */
void * XList::GetItem(int i) const
template <typename T>
T& TensorListBase<T>::GetItem(int i) const
{
CheckNTErrors(i >= -1 && i < count, "Index of a list item is out of scope!");
CheckNTErrors(count > 0, "Cannt index the item in an empty list!");
if(i == -1)
if (i == -1)
return items[count - 1];
else
return items[i];
}
/* get the integer-typed item at position i */
int XList::GetItemInt(int i)
{
CheckNTErrors(isIntList, "An int list is required!");
CheckNTErrors(i >= 0 && i < count, "Index of a list item is out of scope!");
return *(int*)(items[i]);
}
/* set the item at position i */
void XList::SetItem(int i, void * item)
template <typename T>
inline void TensorListBase<T>::SetItem(int i, const T& item)
{
if( i >= 0 && i < count )
if (i >= 0 && i < count)
items[i] = item;
}
/* get the integer-typed item at position i */
void XList::SetItemInt(int i, int item)
template<typename T>
inline void TensorListBase<T>::SetItem(int i, T&& item)
{
CheckNTErrors(isIntList, "An int list is required!");
if( i >= 0 && i < count )
*(int*)(items[i]) = item;
if (i >= 0 && i < count)
items[i] = std::move(item);
}
/*
......@@ -246,37 +236,28 @@ find the position of the first matched item
<< the position where we hit the item (if any)
*/
int XList::FindFirst(void * item)
template <typename T>
inline int TensorListBase<T>::FindFirst(const T& item)
{
for(int i = 0;i < count; i++){
if(item == items[i])
for (int i = 0; i < count; i++) {
if (item == items[i])
return i;
}
return -1;
}
/* clear the data array */
void XList::Clear()
template <typename T>
void TensorListBase<T>::Clear()
{
if(isIntList){
for(int i = 0; i < count; i++){
delete[] (int*)items[i];
}
count = 0;
}
else
count = 0;
}
/* delete the data array as well as the string arrays kept in it */
void XList::ClearStringList()
{
if(mem == NULL){
for(int i = 0; i < count; i++){
delete[] (char*)items[i];
}
}
count = 0;
/*
compare function for two elements
*/
int Compare(const void* a, const void* b) {
return (*(int*)(a)-*(int*)(b));
}
/*
......@@ -284,29 +265,32 @@ sort the list
>> itemSize - size of an item
>> comp - the comparison function used in sorting
*/
void XList::Sort(int itemSize, ListCompare comp)
template <typename T>
void TensorListBase<T>::Sort(int itemSize)
{
qsort(items, count, itemSize, comp);
qsort((void*)items, count, itemSize, Compare);
}
/* reverse the list */
void XList::Reverse()
template <typename T>
inline void TensorListBase<T>::Reverse()
{
int half = count/2;
for(int i = 0; i < half; i++){
void * tmp = items[i];
int half = count / 2;
for (int i = 0; i < half; i++) {
T tmp(items[i]);
items[i] = items[count - i - 1];
items[count - i - 1] = tmp;
}
}
/* remove the item at position i */
void XList::Remove(int i)
template <typename T>
void TensorListBase<T>::Remove(int i)
{
if(i >= count || i < 0)
if (i >= count || i < 0)
return;
memcpy(items + i, items + i + 1, sizeof(void*) * (count - i - 1));
memcpy(items + i, items + i + 1, sizeof(T*) * (count - i - 1));
count--;
}
......@@ -316,10 +300,11 @@ copy the list
>> myMem - memory pool used for allocating the data in the new list
<< hard copy of the list
*/
XList * XList::Copy(XMem * myMem)
template <typename T>
TensorListBase<T>* TensorListBase<T>::Copy(XMem* myMem)
{
XList * newList = new XList(maxNum, myMem);
for(int i = 0; i < count; i++){
TensorListBase<T>* newList = new TensorListBase<T>(maxNum, myMem);
for (int i = 0; i < count; i++) {
newList->Add(GetItem(i));
}
return newList;
......@@ -331,29 +316,39 @@ shuffle the list
>> beg - where we start
>> len - how many items are used in shuffling
*/
void XList::Shuffle(int nround, int beg, int len)
template <typename T>
void TensorListBase<T>::Shuffle(int nround, int beg, int len)
{
if(beg < 0){
if (beg < 0) {
beg = 0;
len = count;
}
if(beg + len > count)
if (beg + len > count)
return;
srand((unsigned int)time(NULL));
for(int k = 0; k < nround; k++){
/* Fisher¨CYates shuffle */
for(int i = 0; i < len; i++){
float a = (float)rand()/RAND_MAX;
size_t j = (unsigned int) (a*(i+1));
void* t = items[beg + j];
for (int k = 0; k < nround; k++) {
/* Fisher CYates shuffle */
for (int i = 0; i < len; i++) {
float a = (float)rand() / RAND_MAX;
size_t j = (unsigned int)(a * (i + 1));
T t = items[beg + j];
items[beg + j] = items[beg + i];
items[beg + i] = t;
}
}
}
}
/* end of the nts (NiuTrans.Tensor) namespace */
/* specializations and typedef of list */
template struct TensorListBase<int>;
template struct TensorListBase<char>;
template struct TensorListBase<char*>;
template struct TensorListBase<long>;
template struct TensorListBase<float>;
template struct TensorListBase<short>;
template struct TensorListBase<XTensor*>;
template struct TensorListBase<void*>;
} /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,32 +15,31 @@
* limitations under the License.
*/
/*
/*
*
* Implementation of list that keeps data items
* Implementation of template list that keeps data items
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-04-17
* The first coding job this year!
* $Created by: HU Chi (huchinlp@foxmail.com)
*
*/
#ifndef __XLIST_H__
#define __XLIST_H__
#include "XMem.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
#ifndef __TensorList_H__
#define __TensorList_H__
typedef int (* ListCompare)(const void * item1, const void * item2);
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
/* the XList class */
class XList
{
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
public:
/* data items */
void ** items;
T *items;
/* number of items */
int count;
......@@ -49,56 +48,88 @@ public:
int maxNum;
/* the memory pool for data array allocation */
XMem * mem;
/* indicates whether data items are integers */
bool isIntList;
XMem* mem;
public:
/* constructor */
XList();
TensorListBase();
/* constructor */
XList(int myMaxNum, bool isIntListOrNot = false);
TensorListBase(int myMaxNum);
/* constructor */
XList(int myMaxNum, XMem * myMem, bool isIntListOrNot = false);
TensorListBase(int myMaxNum, XMem* myMem);
/* de-constructor */
~XList();
/* utilities */
void Create(int myMaxNum, XMem * myMem);
void Add(const void * item);
void Add(void ** inputItems, int inputItemCount);
void AddList(XList * l);
void AddInt(int i);
void Insert(int pos, void * item);
void * GetItem(int i) const;
int GetItemInt(int i);
void SetItem(int i, void * item);
void SetItemInt(int i, int item);
int FindFirst(void * item);
~TensorListBase();
/* add an item into the list */
void Add(T&& item);
/* add an item into the list */
void Add(const T& item);
/* add a number of items into the list */
void Add(T* inputItems, int inputItemCount);
/* append a list to the current list */
void AddList(TensorListBase* l);
/* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
void Clear();
void ClearStringList();
void Sort(int itemSize, ListCompare comp);
/* sort the list */
void Sort(int itemSize);
/* reverse the list */
void Reverse();
/* remove the item at position i */
void Remove(int i);
XList * Copy(XMem * myMem);
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */
_XINLINE_ void * Get(int i) {return GetItem(i);};
_XINLINE_ int GetInt(int i) {return GetItemInt(i);};
_XINLINE_ void Set(int i, void * item) {SetItem(i, item);};
_XINLINE_ void SetInt(int i, int item) {SetItemInt(i, item);};
T& operator[] (int i) {
return GetItem(i);
};
T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
};
extern XList NULLList;
struct XTensor;
typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList;
typedef TensorListBase<void*> XList;
typedef TensorListBase<XTensor*> TensorList;
}
/* end of the nts (NiuTrans.Tensor) namespace */
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
#endif // __TensorList_H__
......@@ -34,6 +34,11 @@ namespace nts{
int testxmemid = 0;
void * recordp = NULL;
/*
for managing the memories
*/
XMemManager GMems;
XMem * GMem;
/* constructor */
......@@ -1488,4 +1493,158 @@ cublasHandle_t * XMem::GetCublasHandle()
#endif
/* constructor */
XMemManager::XMemManager()
{
Initialize();
}
/* de-constructor */
XMemManager::~XMemManager()
{
}
/* get memory size */
MTYPE XMemManager::GetAvailableMemory()
{
unsigned long freeMem = 0;
#ifndef WIN32
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#else
MEMORYSTATUSEX memoryStatus;
memoryStatus.dwLength = sizeof(memoryStatus);
if (GlobalMemoryStatusEx(&memoryStatus)){
freeMem = memoryStatus.ullAvailPhys;
}
#endif
return (MTYPE)freeMem;
}
/* get GPU memory size */
MTYPE XMemManager::GetAvailableGPUMemory(int devID)
{
size_t freeMem = 0;
size_t totalMem = 0;
#ifdef USE_CUDA
cudaSetDevice(devID);
if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
XPRINT(0, stderr, "cannot get GPU memory information.");
exit(1);
}
#endif
return (MTYPE)freeMem;
}
/* get buffer size */
void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
{
*myBufSize = 0;
if (freeMem >= MILLION * 128){
*myBufSize = MILLION * 32;
if (freeMem >= MILLION * 256){
*myBufSize = MILLION * 64;
if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256;
if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512;
}
}
}
}
}
/* initialize it and set the global memory information */
void XMemManager::Initialize()
{
srand((unsigned int)time(NULL));
Free();
/* CPUs (we actually do not care about how many CPUs are using) */
nCPUMem = 1;
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
/* GPUs */
nGPUMem = 0;
#ifdef USE_CUDA
if (cudaGetDeviceCount(&nGPUMem) != cudaSuccess) {
XPRINT(0, stderr, "cannot get GPU information.");
exit(1);
}
for (int i = 0; i < nGPUMem; i++) {
MTYPE freeMem = GetAvailableGPUMemory(i);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[i].Initialize(i, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
}
#endif
}
/* free it */
void XMemManager::Free()
{
for (int i = 0; i < MAX_CPU_NUM; i++)
CPUMems[i].Free();
for (int i = 0; i < MAX_GPU_NUM; i++)
GPUMems[i].Free();
}
/* get global memory pool */
XMem * XMemManager::GetMem(const int devID)
{
XMem * mem = NULL;
if (devID < 0)
mem = CPUMems;
else{
if (devID < nGPUMem)
mem = GPUMems + devID;
else
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
}
return mem;
}
/* get global memory size */
int XMemManager::GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize)
{
XMem * mem = GetMem(devID);
int result = 0;
if (mem != NULL){
*myBlockSize = mem->maxBlockSize;
*myBlockNum = mem->blockNum;
*myBufSize = mem->bufSize;
result = 1;
}
return result;
}
/* show memory information */
void XMemManager::ShowMemInfo()
{
XPRINT(1, stderr, "Memory Information:\n");
MTYPE myBlockSize, myBufSize;
int myBlockNum;
for(int i = 0; i < nCPUMem; i++){
GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%d, blockNum:%d, bufSize:%d\n", myBlockSize, myBlockNum, myBufSize);
}
for(int i = 0; i < nGPUMem; i++){
GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%d, blockNum:%d, bufSize:%d\n", i, myBlockSize, myBlockNum, myBufSize);
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -39,6 +39,12 @@
#include <curand.h>
#endif
#ifndef WIN32
#include <unistd.h>
#else
#include <windows.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
......@@ -53,6 +59,8 @@ typedef long long INT_64;
#define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_NUM 16
#define MAX_GPU_NUM 16
/*
mode of runnig a memory pool
......@@ -413,6 +421,61 @@ public:
};
/*
a class for the management of memory
*/
class XMemManager
{
public:
/* cpu memory pool information */
XMem CPUMems[MAX_CPU_NUM];
/* number of cpu memory pools */
int nCPUMem;
/* gpu memory pool information */
XMem GPUMems[MAX_GPU_NUM];
/* number of gpu memory pools */
int nGPUMem;
public:
/* constructor */
XMemManager();
/* de-constructor */
~XMemManager();
/* get memory size */
MTYPE GetAvailableMemory();
/* get GPU memory size */
MTYPE GetAvailableGPUMemory(int devID);
/* get buffer size */
void GetBufferSize(MTYPE freeMem, MTYPE * myBufSize);
/* initialize it and set the global memory information */
void Initialize();
/* free it */
void Free();
/* get global memory pool */
XMem * GetMem(const int devID);
/* get global memory size */
int GetMemSize(const int devID, MTYPE * myBlockSize, int * myBlockNum, MTYPE * myBufSize);
/* show memory information */
void ShowMemInfo();
};
/* managing the memories */
extern XMemManager GMems;
extern XMem * GMem;
extern int testxmemid;
......
......@@ -77,6 +77,12 @@ const char * GetOPName(int type)
return "M_POWER";
else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT";
else if (type == MATH_SCALE)
return "M_SCALE";
else if (type == MATH_DESCALE)
return "M_DESCALE";
else if (type == MATH_SHIFT)
return "M_SHIFT";
else if (type == MATH_MULANDSHIFT)
return "M_OPERATION";
else if (type == MATH_SIGN)
......@@ -111,6 +117,8 @@ const char * GetOPName(int type)
return "M_COPYVALUES";
else if (type == MOVEMENT_GATHER)
return "M_GATHER";
else if (type == MOVEMENT_DROPOUTWITHINDEX)
return "M_DROPOUTWITHINDEX";
else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE";
else if (type == SHAPE_MERGE)
......@@ -152,6 +160,10 @@ const char * GetOPName(int type)
else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX";
}
else if ((type & LOSS_BASE) != 0) {
if (type == LOSS_CROSSENTROPY)
return "L_CROSSENTROPY";
}
return "NULL";
}
......
......@@ -58,7 +58,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1
#define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1
#define MATH_SIGN MATH_MULANDSHIFT + 1
#define MATH_SCALE MATH_MULANDSHIFT + 1
#define MATH_DESCALE MATH_SCALE + 1
#define MATH_SHIFT MATH_DESCALE + 1
#define MATH_MOD MATH_SHIFT + 1
#define MATH_SIGN MATH_MOD + 1
#define MATH_SUB MATH_SIGN + 1
#define MATH_SUBDIM MATH_SUB + 1
#define MATH_SUM MATH_SUBDIM + 1
......@@ -81,8 +85,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define MOVEMENT_DROPOUTWITHINDEX MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_DROPOUTWITHINDEX + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1
......@@ -108,6 +113,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1
#define LOSS_BASE FUNCTION_BASE * 2
#define LOSS_CROSSENTROPY LOSS_BASE + 1
/* get operator name */
const char * GetOPName(int type);
......
......@@ -146,7 +146,7 @@ run a set of jobs in parallel
>> jobArgs - the list of arguments for each job
>> sleepTime - time to sleep (in ms) for each round
*/
void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
{
if(threadNum <= 0){
XPRINT(1, stderr, "Error! No threads were created!\n");
......@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);
/* the arguments that are passed to the function */
volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c);
volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);
/* thread */
XThread * thread = threads + availableThreads[i];
......
......@@ -106,7 +106,7 @@ public:
void KillThreads();
/* run a set of jobs in parallel */
void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0);
void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);
/* get the number of parallel jobs to run */
int GetJobNum(int size);
......
......@@ -42,7 +42,7 @@ job item used in queues
JobQueueNode::JobQueueNode()
{
job = NULL;
args = new XList(1);
args = new TensorList(1);
}
/* de-constructor */
......@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
head = 0;
tail = 0;
isJobQueue = false;
jobDequeuerArgs = new XList(1);
jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false;
runningJobCount = 0;
jobStream = NULL;
......@@ -188,8 +188,10 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true;
jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid);
// warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs;
......@@ -211,7 +213,7 @@ void XQueue::StopJobConsumer()
}
/* add a job item to process */
void XQueue::EnqueueJob(void * job, XList * jobArgs)
void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{
MUTEX_LOCK(jobQueueMutex);
runningJobCount++;
......@@ -225,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
}
/* job item consumer */
void XQueue::DequeueJobs(XList * args)
void XQueue::DequeueJobs(TensorList * args)
{
CheckNTErrors((args->count == 2), "Illegal arguments!");
......
......@@ -52,7 +52,7 @@ public:
void * job;
/* arguments of the job */
XList * args;
TensorList * args;
public:
/* constructor */
......@@ -102,7 +102,7 @@ private:
XThread jobDequeuer;
/* argument list of jobDequeuer */
XList * jobDequeuerArgs;
TensorList * jobDequeuerArgs;
/* indicates whether jobDequeuer stops */
bool jobDequeuerBreak;
......@@ -141,11 +141,11 @@ public:
void StopJobConsumer();
/* add a job item to process */
void EnqueueJob(void * job, XList * jobArgs);
void EnqueueJob(void * job, TensorList * jobArgs);
/* job item consumer */
static
void DequeueJobs(XList * args);
void DequeueJobs(TensorList * args);
/* get the break flag */
bool GetJobBreak();
......
......@@ -564,6 +564,37 @@ bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b, const XTensor *
return IsSameShaped(a, b) && IsSameShaped(a, c);
}
bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
{
if (a == NULL || b == NULL)
return false;
if ((a->order - 1) != b->order)
return false;
for (int i = 0; i < b->order; i++) {
if (i < dim) {
if (a->dimSize[i] != b->dimSize[i])
return false;
}
else if (i >= dim) {
if (a->dimSize[i+1] != b->dimSize[i])
return false;
}
}
if(a->dataType != b->dataType)
return false;
if(a->denseRatio != b->denseRatio)
return false;
if(a->isSparse != b->isSparse)
return false;
return true;
}
/*
set the size of each dimension
>> myDimSize - size of each dimension
......@@ -644,21 +675,21 @@ reshape the tensor by merging two consecutive dimensions
*/
void XTensor::ReshapeMerged(const int i, const int j)
{
if(i < 0)
if (i < 0)
return;
int di = i;
int dj = j < 0 ? i + 1: j;
int dj = j < 0 ? i + 1 : j;
CheckNTErrors(di < order, "Wrong dimension index!");
int dims[MAX_TENSOR_DIM_NUM];
for(int k = 0; k < di; k++)
for (int k = 0; k < di; k++)
dims[k] = dimSize[k];
dims[di] = dimSize[di] * dimSize[dj];
for(int k = dj + 1; k < order; k++)
for (int k = dj + 1; k < order; k++)
dims[k - 1] = dimSize[k];
Reshape(order - 1, dims);
......@@ -1983,7 +2014,7 @@ void XTensor::FlushToMem(XMem * targetMem)
if (targetMem->devID >= 0) {
#ifdef USE_CUDA
if (devID < 0) {
XList l(1);
TensorList l(1);
l.Add(this);
CudaCPUToGPUFlush(&l, targetMem->devID, targetMem);
}
......@@ -2123,6 +2154,48 @@ void InitTensor(XTensor * tensor,
}
/*
initialize a dense tensor V2
>> tensor - the tensor we intend to initialize
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
>> myDataType - unit size (e.g., int, float, and double)
>> myDenseRatio - how often an element has non-zero value
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site
*/
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID)
{
if(tensor->mem != NULL){
tensor->Resize(myOrder, myDimSize, myDataType, 1.0F);
}
else{
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, myDimSize, sizeof(int) * myOrder);
bool allocated = true;
for (int i = 0; i < myOrder; i++) {
if (dims[i] < 0)
allocated = false;
}
dims[0] = -abs(dims[0]);
if (myDevID == CURRENT_GPU)
tensor->devID = XDevice::GetGPUDevice();
else
tensor->devID = myDevID;
tensor->Resize(myOrder, dims, myDataType, 1.0F);
if(allocated)
XTensor::AllocateData(tensor);
}
}
/*
initialize a dense tensor
>> tensor - the tensor we intend to initialize
>> num - number of elements
......@@ -2144,6 +2217,24 @@ void InitTensor1D(XTensor * tensor, const int num,
}
/*
initialize a dense tensor V2
>> tensor - the tensor we intend to initialize
>> num - number of elements
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site
*/
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[1];
dims[0] = num;
InitTensorV2(tensor, 1, dims, myDataType, myDevID);
}
/*
initialize a dense matrix
>> tensor - the tensor we intend to initialize
>> rowNum - number of rows
......@@ -2167,6 +2258,26 @@ void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
}
/*
initialize a dense matrix V2
>> tensor - the tensor we intend to initialize
>> rowNum - number of rows
>> colNum - number of columns
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site
*/
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[2];
dims[0] = rowNum;
dims[1] = colNum;
InitTensorV2(tensor, 2, dims, myDataType, myDevID);
}
/*
initialize a dense 3d tensor
>> tensor - the tensor we intend to initialize
>> d0 - size of dimension 0
......@@ -2192,6 +2303,28 @@ void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
}
/*
initialize a dense 3d tensor V2
>> tensor - the tensor we intend to initialize
>> d0 - size of dimension 0
>> d1 - size of dimension 1
>> d2 - size of dimension 2
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site
*/
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[3];
dims[0] = d0;
dims[1] = d1;
dims[2] = d2;
InitTensorV2(tensor, 3, dims, myDataType, myDevID);
}
/*
initialize a dense 4d tensor
>> tensor - the tensor we intend to initialize
>> d0 - size of dimension 0
......@@ -2219,6 +2352,30 @@ void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, co
}
/*
initialize a dense 4d tensor V2
>> tensor - the tensor we intend to initialize
>> d0 - size of dimension 0
>> d1 - size of dimension 1
>> d2 - size of dimension 2
>> d3 - size of dimension 3
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site
*/
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[4];
dims[0] = d0;
dims[1] = d1;
dims[2] = d2;
dims[3] = d3;
InitTensorV2(tensor, 4, dims, myDataType, myDevID);
}
/*
initialize a dense 5d tensor
>> tensor - the tensor we intend to initialize
>> d0 - size of dimension 0
......@@ -2248,6 +2405,32 @@ void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, co
}
/*
initialize a dense 5d tensor V2
>> tensor - the tensor we intend to initialize
>> d0 - size of dimension 0
>> d1 - size of dimension 1
>> d2 - size of dimension 2
>> d3 - size of dimension 3
>> d4 - size of dimension 4
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site
*/
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[5];
dims[0] = d0;
dims[1] = d1;
dims[2] = d2;
dims[3] = d3;
dims[4] = d4;
InitTensorV2(tensor, 5, dims, myDataType, myDevID);
}
/*
initialize a tensor with a reference tensor
>> tensor - the tensor we intend to initialize
>> reference - the reference tensor
......@@ -2263,6 +2446,20 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
}
/*
initialize a tensor with a reference tensor V2
>> tensor - the tensor we intend to initialize
>> reference - the reference tensor
*/
void InitTensorV2(XTensor * tensor, const XTensor * reference)
{
if(reference->order < 0)
return;
InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->devID);
}
/*
initialize a tensor on the CPU with a reference tensor
>> tensor - the tensor we intend to initialize
>> reference - the reference tensor
......@@ -2310,6 +2507,23 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_
}
/*
generate a dense XTensor V2
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
>> myDataType - unit size (e.g., int, float, and double)
>> myDenseRatio - how often an element has non-zero value
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site.
*/
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID)
{
XMem * myMem = GMems.GetMem(myDevID);
return new XTensor(myOrder, myDimSize, myDataType, 1.0F, myDevID, myMem);
}
/*
generate a XTensor which allocates data on the buffer
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
......@@ -2344,6 +2558,35 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
}
/*
generate a dense XTensor which allocates data on the buffer V2
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
>> devID - device id
>> myDataType - unit size (e.g., int, float, and double)
>> myDenseRatio - how often an element has non-zero value
*/
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType, const int devID)
{
int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, myDimSize, sizeof(int) * myOrder);
dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensor(myOrder, dims, myDataType, 1.0F, devID);
if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200);
}
XMem * myMem = GMems.GetMem(devID);
tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize);
return tensor;
}
/*
generate a XTensor which allocates data on the buffer
>> reference - reference tensor
>> devID - device id
......@@ -2359,6 +2602,17 @@ XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem)
}
/*
generate a XTensor which allocates data on the buffer V2
>> reference - reference tensor
>> devID - device id
*/
XTensor * NewTensorBufV2(const XTensor * reference, int devID)
{
return NewTensorBufV2(reference->order, reference->dimSize,
reference->dataType, devID);
}
/*
generate a dense vector
>> num - number of entries
>> myDataType - unit size (e.g., int, float, and double)
......@@ -2379,6 +2633,23 @@ XTensor * NewTensor1D(const int num,
}
/*
generate a dense vector V2
>> num - number of entries
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site.
*/
XTensor * NewTensor1DV2(const int num,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[1];
dims[0] = num;
return NewTensorV2(1, dims, myDataType, myDevID);
}
/*
generate a dense matrix
>> rowNum - number of rows
>> colNum - number of colums
......@@ -2401,6 +2672,25 @@ XTensor * NewTensor2D(const int rowNum, const int colNum,
}
/*
generate a dense matrix V2
>> rowNum - number of rows
>> colNum - number of colums
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site.
*/
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[2];
dims[0] = rowNum;
dims[1] = colNum;
return NewTensorV2(2, dims, myDataType, myDevID);
}
/*
generate a dense 3d tensor
>> d0 - size of dimension 0
>> d1 - size of dimension 1
......@@ -2425,6 +2715,27 @@ XTensor * NewTensor3D(const int d0, const int d1, const int d2,
}
/*
generate a dense 3d tensor V2
>> d0 - size of dimension 0
>> d1 - size of dimension 1
>> d2 - size of dimension 2
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site.
*/
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[3];
dims[0] = d0;
dims[1] = d1;
dims[2] = d2;
return NewTensorV2(3, dims, myDataType, myDevID);
}
/*
generate a dense 4d tensor
>> d0 - size of dimension 0
>> d1 - size of dimension 1
......@@ -2451,6 +2762,29 @@ XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
}
/*
generate a dense 4d tensor V2
>> d0 - size of dimension 0
>> d1 - size of dimension 1
>> d2 - size of dimension 2
>> d3 - size of dimension 3
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site.
*/
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[4];
dims[0] = d0;
dims[1] = d1;
dims[2] = d2;
dims[3] = d3;
return NewTensorV2(4, dims, myDataType, myDevID);
}
/*
generate a dense 5d tensor
>> d0 - size of dimension 0
>> d1 - size of dimension 1
......@@ -2479,6 +2813,31 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
}
/*
generate a dense 5d tensor V2
>> d0 - size of dimension 0
>> d1 - size of dimension 1
>> d2 - size of dimension 2
>> d3 - size of dimension 3
>> d4 - size of dimension 4
>> myDataType - unit size (e.g., int, float, and double)
>> myDevID - when myMem is NULL, myDevID specifies the device
on which we allocate the data on site.
*/
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType, const int myDevID)
{
int dims[5];
dims[0] = d0;
dims[1] = d1;
dims[2] = d2;
dims[3] = d3;
dims[4] = d4;
return NewTensorV2(5, dims, myDataType, myDevID);
}
/*
generate a copy of XTensor
>> a - the tensor we copy from
>> isFilledData - indicates whether we allocate the data for
......
......@@ -255,6 +255,10 @@ public:
static
bool IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c);
/* judge whether b is the reduced shape of a ?? */
static
bool IsReduceShaped(const XTensor * a, const XTensor * b, int dim);
/* set the size of each dimension */
void SetDim(int * myDimSize);
......@@ -447,29 +451,57 @@ void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
......@@ -480,38 +512,72 @@ XTensor * NewTensor();
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
......
......@@ -85,7 +85,7 @@ namespace nts{
#endif
typedef void (*TFunction) (volatile XList*);
typedef void (*TFunction) (volatile TensorList*);
/*
This is a class that wraps the standard implementation of threading
......@@ -133,7 +133,7 @@ public:
/* arguments (for the function to run) */
volatile
XList * argv;
TensorList * argv;
/* a flag to break */
volatile
......
......@@ -97,4 +97,5 @@
#include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h"
#include "../function/DropoutWithIndex.h"
#endif // __CHEADER_H__
......@@ -218,4 +218,55 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
return c;
}
/*
element-wise division of two tensors
c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the item
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetDivDimIndex(a, b);
if (n == -1) {
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
/* call _Div function */
_Div(&a, &b, &c, 0, leadingDim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
}
}
else if (n >= 0 && n < a.order) {
/* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -49,6 +49,13 @@ where i is the index of the element
*/
XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha = 0.0, int leadingDim = 0);
/*
element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element
*/
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __DIV_H__
\ No newline at end of file
......@@ -171,4 +171,35 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
return c;
}
/*
tensor division
c = a / b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put result. we save it in a if c is NULL
>> n - the dimension index
>> alpha - the scaling factor
>> requireLink - if add operation to network
*/
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Div function */
_DivDim(&a, &b, &c, n, alpha);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
}
......@@ -53,6 +53,14 @@ we make a new tensor c to keep the result and return it
*/
XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha = (DTYPE)0.0);
/*
tensor division of two tensors:
c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting
*/
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __DIVDIM_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks -
* busy days :(
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "Mask.h"
#include "Mask.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
mask entries of a given tensor:
c(i) = a(i) if mask(i) is non-zero
c(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void _Mask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha)
{
CheckNTErrors(a && mask && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == mask->unitNum && a->unitNum == c->unitNum,
"Unmatched tensors in addition!");
CheckNTErrors(mask->dataType == X_INT, "The mask tensor must be in X_INT!")
//CheckNTErrors(a->dataType == mask->dataType && a->dataType == c->dataType,
// "Unmatched tensors in addition!");
if (a->devID >= 0 || mask->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
if (a == c) {
int P2PAccesible = 0;
#ifdef CUDA_UVA
cudaDeviceCanAccessPeer(&P2PAccesible, a->devID, b->devID);
#endif
if ((a->devID < 0 && mask->devID >= 0) ||
(a->devID >= 0 && mask->devID < 0) ||
(a->devID >= 0 && mask->devID >= 0 && a->devID != mask->devID && !P2PAccesible))
{
ShowNTErrors("Cannot run this method on multiple devices simultaneously!");
}
else
_CudaMask(a, mask, c, alpha);
}
else
_CudaMask(a, mask, c, alpha);
#endif
}
else {
if (!a->isSparse && !mask->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!");
if (a->dataType == DEFAULT_DTYPE &&
mask->dataType == X_INT &&
c->dataType == DEFAULT_DTYPE)
{
DTYPE * ap = (DTYPE*)a->data;
int * maskp = (int*)mask->data;
DTYPE * cp = (DTYPE*)c->data;
/* unrolling */
int num = a->unitNum;
if (num % 2 == 0) {
for (int i = 0; i < num; i += 2) {
if (maskp[i] == 0) {
cp[i] = alpha;
}
else {
cp[i] = ap[i];
}
if (maskp[i + 1] == 0) {
cp[i + 1] = alpha;
}
else {
cp[i + 1] = ap[i + 1];
}
}
}
else {
for (int i = 0; i < num; i++) {
if (maskp[i] == 0) {
cp[i] = alpha;
}
else {
cp[i] = ap[i];
}
}
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
}
/*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
{
_Mask(a, mask, a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
{
XTensor c(&a);
c.SetTMPFlag();
/* call _Sum function */
_Mask(&a, &mask, &c, alpha);
/* tensor connections */
//XLink::MakeLink(&a, &mask, &c, MATH_SUM);
//XLink::AddParamToHead(&c, alpha);
// TODO!!
ShowNTErrors("TODO!");
return c;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks -
* busy days :(
*/
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "Sub.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
mask entries of a given tensor (CUDA Kernel)
c = a - b * \beta
>> a - A matrix
>> mask - mask matrix
>> c - where we put masked a
>> size - the size of a/b/c
>> alpha - value
*/
__global__
void KernelMASK(DTYPE * a, int * mask, DTYPE * c, int size, DTYPE alpha)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (mask[i] == 0) {
c[i] = alpha;
}
else {
c[i] = a[i];
}
}
}
/*
mask entries of a given tensor (cuda version)
>> a - a tensor
>> mask - mask tensor
>> c - where we put masked a
>> alpha - value
*/
void _CudaMask(const XTensor * a, const XTensor * mask, XTensor * c, DTYPE alpha)
{
CheckNTErrors(a && mask && c, "Empty tensor input!");
CheckNTErrors((a->unitNum == mask->unitNum && a->unitNum == c->unitNum),
"Unmatched tensors in addition!");
CheckNTErrors(mask->dataType == X_INT, "The mask tensor must be in X_INT!")
//CheckNTErrors((a->dataType == mask->dataType && a->dataType == c->dataType),
// "Unmatched tensors in addition!");
CheckNTErrors((a->devID == mask->devID && a->devID == c->devID),
"The tensors must be on the same!");
int devIDBackup = XDevice::GetGPUDevice();
XDevice::SetGPUDevice(a->devID);
if (!a->isSparse && !mask->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse matrix in addition!");
if (a->dataType == DEFAULT_DTYPE &&
mask->dataType == X_INT &&
c->dataType == DEFAULT_DTYPE)
{
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
KernelMASK << <blocks, threads >> >((DTYPE*)a->data, (int *)mask->data, (DTYPE*)c->data, a->unitNum, alpha);
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
}
else {
// TODO!!
ShowNTErrors("TODO!");
}
XDevice::SetGPUDevice(devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2019-04-24
* I'll attend several conferences and workshops in the following weeks -
* busy days :(
*/
#ifndef __MASK_CUH__
#define __MASK_CUH__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* mask entries of a given tensor (cuda version) */
void _CudaMask(const XTensor * a, const XTensor * mask, XTensor * c = NULL, DTYPE alpha = (DTYPE)1.0);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __MASK_CUH__
\ No newline at end of file
......@@ -108,9 +108,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
cBlockNum *= b->dimSizeRDI[i];
}
XList * aList = new XList(10);
XList * bList = new XList(10);
XList * cList = new XList(10);
TensorList * aList = new TensorList(10);
TensorList * bList = new TensorList(10);
TensorList * cList = new TensorList(10);
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
......@@ -202,6 +202,42 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList;
}
bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c)
{
if (!(a && b && c))
return false;
if(!(a->dataType == b->dataType && a->dataType == c->dataType))
return false;
if (!(a->order >= 2 && b->order >= 2 && c->order >= 2))
return false;
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a->order + b->order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a->order; i++)
dimSize[sub++] = a->dimSizeRDI[a->order + 1 - i];
for (int i = 2; i < b->order; i++)
dimSize[sub++] = b->dimSizeRDI[b->order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
for (int i = 0; i < order; i++) {
if (dimSize[i] != c->dimSize[i])
return false;
}
return true;
}
/*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it
......@@ -266,6 +302,53 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
return c;
}
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
if (!c.isInit || !CheckMMulShape(&a, transposedA, &b, transposedB, &c)) {
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
InitTensor(&c, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
}
}
/*
matrix multiplication with no transposition c = a * b * alpha
>> a - tensor a
......@@ -316,6 +399,52 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
return c;
}
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
if (!c.isInit || !CheckMMulShape(&a, X_NOTRANS, &b, X_NOTRANS, &c)) {
int an = a.dimSizeRDI[1];
int am = a.dimSizeRDI[0];
int bn = b.dimSizeRDI[1];
int bm = b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
InitTensor(&c, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
}
}
} // namespace nts(NiuTrans.Tensor)
......
......@@ -59,10 +59,16 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false);
/* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -38,17 +38,23 @@ argument5: matrix a
argument6: matrix b
argument7: matrix c (c=a*b*\alpha + c*beta)
*/
void _MatrixMul2DMultiTheading(XList * args)
void _MatrixMul2DMultiTheading(TensorList * args)
{
int x1 = *(int*)args->GetItem(0);
int y1 = *(int*)args->GetItem(1);
int x2 = *(int*)args->GetItem(2);
int y2 = *(int*)args->GetItem(3);
XTensor * a = (XTensor*)args->GetItem(4);
XTensor * b = (XTensor*)args->GetItem(5);
XTensor * c = (XTensor*)args->GetItem(6);
DTYPE alpha = *(DTYPE*)args->GetItem(7);
DTYPE beta = *(DTYPE*)args->GetItem(8);
CheckNTErrors(args->count == 2, "invalid argument number!");
IntList * indexArgs = (IntList*)args->GetItem(0);
TensorList * matrixArgs = (TensorList*)args->GetItem(1);
CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * a = matrixArgs->GetItem(0);
XTensor * b = matrixArgs->GetItem(1);
XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX
int am = a->dimSize[1];
......
......@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/
void _MatrixMul2DMultiTheading(XList * args);
void _MatrixMul2DMultiTheading(TensorList * args);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -227,9 +227,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> alpha - scalar
>> beta - scalar
*/
void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const TensorList * b, MATRIX_TRANS_TYPE transposedB,
TensorList * c, DTYPE alpha, DTYPE beta)
{
CheckNTErrors(a && b && c, "Empty input lists!");
CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");
......
......@@ -58,8 +58,8 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, cons
matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta (for list inputs)
optimized for GPU
*/
void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, const TensorList * b, MATRIX_TRANS_TYPE transposedB,
TensorList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
/*
matrix multiplication of the two tensors (return an XTensor structure) c = trans(a) * trans(b) * alpha
......
......@@ -117,7 +117,6 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
ShowNTErrors("Something is wrong!");
}
/* tensor connections */
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n);
......
......@@ -219,4 +219,55 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
return c;
}
/*
element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the item
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetMultiplyDimIndex(a, b);
if (n == -1) {
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
/* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
}
}
else if (n >= 0 && n < a.order) {
/* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -49,6 +49,13 @@ where i is the index of the element
*/
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0.0, int leadingDim = 0);
/*
element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element
*/
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLY_H__
\ No newline at end of file
......@@ -170,6 +170,36 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
}
/*
tensor multiplication
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a * b + \alpha * c. we save it in a if c is NULL
>> n - the dimension index
>> requireLink - if add operation to network
*/
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, 0);
}
}
/*
tensor broadcast multiplication
c = a * b + c * \beta
where some of dimensions of b can be of size 1
......@@ -309,4 +339,30 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b)
return c;
}
/*
tensor broadcast multiplication
c = a * b + c * \beta
where some of dimensions of b can be of size 1
>> a - a tensor
>> b - another tensor that would be broadcasted
>> c - the resulting tensor
>> requireLink - if add operation to network
*/
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0);
}
}
}
......@@ -38,6 +38,10 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink = false);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
......@@ -45,6 +49,9 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
we return the resulting tensor here */
XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYDIM_H__
......@@ -79,4 +79,25 @@ XTensor Negate(const XTensor & a)
return b;
}
/*
set every entry to its minus value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Negate(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Negate function */
_Negate(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
/* set every entry to its minus value */
void Negate(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_H__
......@@ -84,4 +84,25 @@ XTensor Sign(const XTensor & a)
return b;
}
/*
set every entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
>> requireLink - if add operation to network
*/
void Sign(const XTensor & a, XTensor & b, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Sign function */
_Sign(&a, &b);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it
*/
XTensor Sign(const XTensor & a);
/* set every entry to its sign value */
void Sign(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
......@@ -196,4 +196,47 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
return c;
}
/*
tensor subtraction c = a - b * \beta
>> a - a tensor
>> b - another tensor
>> c - where we put a-b*\beta. we save it in a if c is NULL
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetSubDimIndex(a, b);
if (n == -1) {
/* call _Sub function */
_Sub(&a, &b, &c, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta);
}
}
else if (n >= 0 && n < a.order) {
/* call _SubDim function */
_SubDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -42,6 +42,9 @@ make a new tensor c to keep the result and return it
*/
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUB_H__
......@@ -171,4 +171,35 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
return c;
}
/*
tensor subtraction
c = a - b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a-b*\beta. we save it in a if c is NULL
>> n - the dimension index
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Sub function */
_SubDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
}
......@@ -38,6 +38,10 @@ void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0);
i.e., a is subtracted with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUBDIM_H__
......@@ -201,4 +201,46 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
return c;
}
/*
tensor summation c = a + b * \beta
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
int n = GetSumDimIndex(a, b);
if (n == -1) {
/* call _Sum function */
_Sum(&a, &b, &c, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
}
}
else if (n >= 0 && n < a.order) {
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -41,6 +41,9 @@ make a new tensor c to keep the result and return it
*/
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUM_H__
......@@ -189,6 +189,37 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
}
/*
tensor summation
c = a + b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a+b*\beta. we save it in a if c is NULL
>> n - the dimension index
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
/*
tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1
c = a + b * \beta
......@@ -329,4 +360,30 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta)
return c;
}
/*
tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1
c = a + b * \beta
>> a - a tensor
>> b - another tensor that would be broadcasted
>> c - the resulting tensor
>> beta - the scaling factor
>> requireLink - if add operation to network
*/
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta);
}
}
}
......@@ -42,6 +42,10 @@ void _SumDim(XTensor * a, const XTensor * b, int n, DTYPE beta = (DTYPE)1.0);
i.e., a is summed with b by broadcasting. We make a new tensor c to keep the result and return it */
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
......@@ -49,6 +53,9 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
we return the resulting tensor here */
XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SUMDIM_H__
......@@ -201,9 +201,9 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
matrix multiplication via cuda version BLAS
*/
void _CudaBLASMatrixMULList(cublasHandle_t * handle,
const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c,
const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const TensorList * b, MATRIX_TRANS_TYPE transposedB,
TensorList * c,
int count, DTYPE alpha, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input lists!");
......
......@@ -56,8 +56,8 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */
void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
void _CudaBLASMatrixMULList(cublasHandle_t * handle, const TensorList * a, MATRIX_TRANS_TYPE transposedA,
const TensorList * b, MATRIX_TRANS_TYPE transposedB, TensorList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
#endif
......
......@@ -111,9 +111,10 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size;
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return;
......@@ -129,8 +130,7 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
DTYPE * od = onehotData + i * stride;
od[id] = 2;
//onehotData[i * stride + id] = 1;
od[id] = 1;
}
}
......
......@@ -31,16 +31,31 @@ int scale(int x, int scale)
return x * scale;
}
float scale(float x, float scale)
{
return x * scale;
}
int descale(int x, int descale)
{
return x / descale;
}
float descale(float x, float descale)
{
return x / descale;
}
int shift(int x, int shift)
{
return x + shift;
}
float shift(float x, float shift)
{
return x + shift;
}
int mod(int x, int mod)
{
return x % mod;
......@@ -48,7 +63,7 @@ int mod(int x, int mod)
#ifdef USE_CUDA
/* define three marco separately, specify the respective function names (GPU mode) */
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, int num) \
{ \
/* run it on GPUs */ \
......@@ -58,82 +73,188 @@ void _funcName(const XTensor * a, XTensor * b, int num) \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_INT), "TODO!"); \
CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!"); \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (int)origFunc(d[i], num); \
} \
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, float num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc(d[i], num); \
}
#define SIMPLE_BINARY_FUNCTION_ME(funcName, _funcName) \
#define SIMPLE_BINARY_FUNCTION_ME_INT(funcName, _funcName) \
void funcName(XTensor &a, int num) \
{ \
_funcName(&a, &a, num); \
}
} \
#define SIMPLE_BINARY_FUNCTION(funcName, _funcName) \
#define SIMPLE_BINARY_FUNCTION_ME(funcName, _funcName) \
void funcName(XTensor &a, float num) \
{ \
_funcName(&a, &a, num); \
} \
#define SIMPLE_BINARY_FUNCTION_INT(funcName, _funcName) \
void funcName(const XTensor &a, XTensor &b, int num) \
{ \
_funcName(&a, &b, num); \
}
} \
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, scale)
SIMPLE_BINARY_FUNCTION_ME(Scale, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale)
#define SIMPLE_BINARY_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, float num) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, num); \
XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \
} \
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescale, descale)
SIMPLE_BINARY_FUNCTION_ME(Descale, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale)
#define SIMPLE_BINARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, float num, bool requireLink) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b, num); \
if (requireLink) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
} \
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, shift)
SIMPLE_BINARY_FUNCTION_ME(Shift, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION_INT(_Scale, _CudaScale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScaleFloat, scale)
SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE)
SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE)
_SIMPLE_BINARY_FUNCTION_INT(_Descale, _CudaDescale, descale)
SIMPLE_BINARY_FUNCTION_ME_INT(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescaleFloat, descale)
SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE)
SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE)
_SIMPLE_BINARY_FUNCTION_INT(_Shift, _CudaShift, shift)
SIMPLE_BINARY_FUNCTION_ME_INT(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShiftFloat, shift)
SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT)
_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME(Mod, _Mod)
SIMPLE_BINARY_FUNCTION(Mod, _Mod)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_BINARY_FUNCTION(_funcName, origFunc) \
#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, int num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_INT), "TODO!"); \
CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!"); \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (int)origFunc(d[i], num); \
} \
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, float num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc(d[i], num); \
}
#define SIMPLE_BINARY_FUNCTION_ME_INT(funcName, _funcName) \
void funcName(XTensor &a, int num) \
{ \
_funcName(&a, &a, num); \
} \
#define SIMPLE_BINARY_FUNCTION_ME(funcName, _funcName) \
void funcName(XTensor & a, int num) \
void funcName(XTensor &a, float num) \
{ \
_funcName(&a, &a, num); \
}
} \
#define SIMPLE_BINARY_FUNCTION_INT(funcName, _funcName) \
void funcName(const XTensor &a, XTensor &b, int num) \
{ \
_funcName(&a, &b, num); \
} \
#define SIMPLE_BINARY_FUNCTION(funcName, _funcName) \
void funcName(const XTensor & a, XTensor &b, int num) \
void funcName(const XTensor &a, XTensor &b, float num) \
{ \
_funcName(&a, &b, num); \
}
} \
_SIMPLE_BINARY_FUNCTION_INT(_Scale, _CudaScale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(Scale, _Scale)
SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION(_Scale, scale)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScaleFloat, scale)
SIMPLE_BINARY_FUNCTION_ME(Scale, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION(_Descale, descale)
_SIMPLE_BINARY_FUNCTION_INT(_Descale, _CudaDescale, descale)
SIMPLE_BINARY_FUNCTION_ME_INT(Descale, _Descale)
SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescaleFloat, descale)
SIMPLE_BINARY_FUNCTION_ME(Descale, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION(_Shift, shift)
_SIMPLE_BINARY_FUNCTION_INT(_Shift, _CudaShift, shift)
SIMPLE_BINARY_FUNCTION_ME_INT(Shift, _Shift)
SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShiftFloat, shift)
SIMPLE_BINARY_FUNCTION_ME(Shift, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION(_Mod, mod)
SIMPLE_BINARY_FUNCTION_ME(Mod, _Mod)
SIMPLE_BINARY_FUNCTION(Mod, _Mod)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(Mod, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#endif
......
......@@ -36,18 +36,36 @@ int cudascale(int x, int scale)
}
__device__
float cudascale(float x, float scale)
{
return x * scale;
}
__device__
int cudadescale(int x, int descale)
{
return x / descale;
}
__device__
float cudadescale(float x, float descale)
{
return x / descale;
}
__device__
int cudashift(int x, int shift)
{
return x + shift;
}
__device__
float cudashift(float x, float descale)
{
return x + descale;
}
__device__
int cudamod(int x, int mod)
{
return x % mod;
......@@ -92,9 +110,51 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, int num) \
BacktoCudaDev(a->devID, devIDBackup); \
} \
#define SIMPLE_BINARY_FUNCTION_FLOAT_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(float * a, float * b, int size, float num) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
b[i] = (float)origFunc(a[i], num); \
} \
\
\
void _Cuda##funcName(const XTensor * a, XTensor * b, float num) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == X_FLOAT) { \
Kernel##funcName<<<blocks, threads>>> \
((float*)a->data, (float*)b->data, a->unitNum, num);\
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
}
SIMPLE_BINARY_FUNCTION_GPU(Scale, cudascale)
SIMPLE_BINARY_FUNCTION_FLOAT_GPU(ScaleFloat, cudascale)
SIMPLE_BINARY_FUNCTION_GPU(Descale, cudadescale)
SIMPLE_BINARY_FUNCTION_FLOAT_GPU(DescaleFloat, cudadescale)
SIMPLE_BINARY_FUNCTION_GPU(Shift, cudashift)
SIMPLE_BINARY_FUNCTION_FLOAT_GPU(ShiftFloat, cudashift)
SIMPLE_BINARY_FUNCTION_GPU(Mod, cudamod)
#endif // USE_CUDA
......
......@@ -32,20 +32,29 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* scale each entry (CUDA Kernel) */
__global__
void KernelScale(int * a, int * b, int size, int scale);
__global__
void KernelScale(int * a, int * b, int size, float scale);
/* scale each entry */
void _CudaScale(const XTensor * a, XTensor * b, int scale);
void _CudaScaleFloat(const XTensor * a, XTensor * b, float scale);
/* descale each entry (CUDA Kernel) */
__global__
void KernelDescale(int * a, int * b, int size, int scale);
__global__
void KernelDescale(int * a, int * b, int size, float scale);
/* descale each entry */
void _CudaDescale(const XTensor * a, XTensor * b, int scale);
void _CudaDescaleFloat(const XTensor * a, XTensor * b, float scale);
/* shift each entry (CUDA Kernel) */
__global__
void KernelShift(int * a, int * b, int size, int shift);
__global__
void KernelShift(int * a, int * b, int size, float shift);
/* shift each entry */
void _CudaShift(const XTensor * a, XTensor * b, int shift);
void _CudaShiftFloat(const XTensor * a, XTensor * b, float shift);
/* mod each entry (CUDA Kernel) */
__global__
......
......@@ -37,51 +37,76 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
scale up tensor entires (on site)
b = a * scale
*/
void Scale(XTensor & a, int scale);
void Scale(XTensor & a, float scale);
void _ScaleMe(XTensor & a, int scale);
void _ScaleMe(XTensor & a, float scale);
/*
scale up tensor entires
b = a * scale
*/
void Scale(const XTensor & a, XTensor &b, int scale);
void Scale(const XTensor & a, XTensor &b, float scale);
void Scale(const XTensor & a, XTensor &b, float scale, bool requireLink = false);
/*
scale up tensor entires (return an XTensor structure)
b = a * scale
*/
XTensor Scale(const XTensor & a, float scale);
/*
descale tensor entires
b = a / scale
*/
void _Descale(const XTensor * a, XTensor * b, int scale);
void _Descale(const XTensor * a, XTensor * b, float scale);
/*
descale tensor entires (on site)
b = a / scale
*/
void Descale(XTensor & a, int scale);
void _DescaleMe(XTensor & a, int scale);
void _DescaleMe(XTensor & a, float scale);
/*
descale tensor entires
b = a / scale
*/
void Descale(const XTensor & a, XTensor & b, int scale);
void Descale(const XTensor & a, XTensor & b, float scale, bool requireLink = false);
/*
descale tensor entires (return an XTensor structure)
b = a / scale
*/
XTensor Descale(const XTensor & a, float scale);
/*
shift tensor entires
b = a + shift
*/
void _Shift(const XTensor * a, XTensor * b, int shift);
void _Shift(const XTensor * a, XTensor * b, float shift);
/*
shift tensor entires (on site)
b = a + shift
*/
void Shift(XTensor & a, int shift);
void _ShiftMe(XTensor & a, int shift);
void _ShiftMe(XTensor & a, float shift);
/*
shift tensor entires
b = a + shift
*/
void Shift(const XTensor & a, XTensor & b, int shift);
void Shift(const XTensor & a, XTensor & b, float shift, bool requireLink = false);
/*
shift tensor entires (return an XTensor structure)
b = a + shift
*/
XTensor Shift(const XTensor & a, float shift);
/*
mod tensor entires
......@@ -93,7 +118,7 @@ void _Mod(const XTensor * a, XTensor * b, int base);
mod tensor entires (on site)
b = a % mod
*/
void Mod(XTensor & a, int base);
void _ModMe(XTensor & a, int base);
/*
mod tensor entires
......
......@@ -94,6 +94,23 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
return b;
}
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Clip function */
_Clip(&a, &b, lower, upper);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
}
}
/*
backward computation
......
......@@ -37,6 +37,8 @@ void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink = false);
/*
backward of Clip function
*/
......
......@@ -138,12 +138,12 @@ XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTen
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
/* tensor connections */
XList list(5);
list.Add(&input);
list.Add(&mean);
list.Add(&var);
list.Add(&a);
list.Add(&b);
TensorList list(5);
list.Add((XTensor*)&input);
list.Add((XTensor*)&mean);
list.Add((XTensor*)&var);
list.Add((XTensor*)&a);
list.Add((XTensor*)&b);
XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon);
......
......@@ -102,4 +102,27 @@ XTensor Power(const XTensor & a, DTYPE p)
return b;
}
/*
get the power(a, p)
>> a - input tensor
>> b - output tensor
>> p - parameter
>> requireLink - if add operation to network
*/
void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _Power function */
_Power(&a, &b, p);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_POWER);
XLink::AddParamToHead(&b, p);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -41,6 +41,9 @@ make a new tensor to keep the result and return it
*/
XTensor Power(const XTensor & a, DTYPE p);
/* get the power(x, y) */
void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __POWER_H__
......@@ -118,4 +118,33 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
return b;
}
/*
scale and shift all tensor entires
b = a * scale + shift
>> a - the input tensor
>> b - the output tensor
>> scale - the scaler factor
>> shift - the shift factor
>> requireLink - if add operation to network
*/
void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, bool requireLink)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
}
/* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale);
XLink::AddParamToHead(&b, shift);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -50,6 +50,12 @@ b = a * scale + shift
*/
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
b = a * scale + shift
*/
void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SCALEANDSHIFT_H__
\ No newline at end of file
......@@ -82,58 +82,82 @@ XTensor funcName(const XTensor &a) \
return b; \
}
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (requireLink) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
_SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
SIMPLE_UNARY_FUNCTION_VOID(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
SIMPLE_UNARY_FUNCTION_VOID(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
SIMPLE_UNARY_FUNCTION_VOID(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
SIMPLE_UNARY_FUNCTION_VOID(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
SIMPLE_UNARY_FUNCTION_VOID(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
SIMPLE_UNARY_FUNCTION_VOID(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
SIMPLE_UNARY_FUNCTION_VOID(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
SIMPLE_UNARY_FUNCTION_VOID(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
SIMPLE_UNARY_FUNCTION_VOID(Sin, _Sin, MATH_SIN)
_SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos)
_SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos)
SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS)
SIMPLE_UNARY_FUNCTION_VOID(Cos, _Cos, MATH_COS)
_SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
SIMPLE_UNARY_FUNCTION_VOID(Tan, _Tan, MATH_TAN)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
......@@ -164,59 +188,82 @@ XTensor funcName(const XTensor &a) \
XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \
}
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (requireLink) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
_SIMPLE_UNARY_FUNCTION(_Absolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
SIMPLE_UNARY_FUNCTION_VOID(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
SIMPLE_UNARY_FUNCTION_VOID(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
SIMPLE_UNARY_FUNCTION_VOID(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
SIMPLE_UNARY_FUNCTION_VOID(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
SIMPLE_UNARY_FUNCTION_VOID(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
SIMPLE_UNARY_FUNCTION_VOID(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
SIMPLE_UNARY_FUNCTION_VOID(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
SIMPLE_UNARY_FUNCTION_VOID(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
SIMPLE_UNARY_FUNCTION_VOID(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
SIMPLE_UNARY_FUNCTION_VOID(Sin, _Sin, MATH_SIN)
_SIMPLE_UNARY_FUNCTION(_Cos, cos)
_SIMPLE_UNARY_FUNCTION_ME(_CosMe, _Cos)
SIMPLE_UNARY_FUNCTION(Cos, _Cos, MATH_COS)
SIMPLE_UNARY_FUNCTION_VOID(Cos, _Cos, MATH_COS)
_SIMPLE_UNARY_FUNCTION(_Tan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
SIMPLE_UNARY_FUNCTION_VOID(Tan, _Tan, MATH_TAN)
/*_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
......
......@@ -34,6 +34,8 @@ void _AbsoluteMe(XTensor * a);
/* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Absolute(const XTensor & a);
/* set every entry to its absolute value */
void Absolute(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its ceil value */
void _Ceil(const XTensor * a, XTensor * b);
......@@ -43,6 +45,8 @@ void _CeilMe(XTensor * a);
/* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a);
/* set every entry to its ceil value */
void Ceil(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its exponent value */
void _Exp(const XTensor * a, XTensor * b);
......@@ -52,6 +56,8 @@ void _ExpMe(XTensor * a);
/* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Exp(const XTensor & a);
/* set every entry to its exponent value */
void Exp(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its floor value */
void _Floor(const XTensor * a, XTensor * b);
......@@ -61,6 +67,8 @@ void _FloorMe(XTensor * a);
/* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a);
/* set every entry to its floor value */
void Floor(const XTensor & a, XTensor & b, bool requireLink = false);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _IsNonZero(const XTensor *a, XTensor *b);
......@@ -70,6 +78,8 @@ void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void IsNonZero(const XTensor &a, XTensor & b, bool requireLink = false);
/* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b);
......@@ -79,6 +89,8 @@ void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero */
void IsZero(const XTensor &a, XTensor & b, bool requireLink = false);
/* set every entry to its logarithm value */
void _Log(const XTensor * a, XTensor * b);
......@@ -88,6 +100,8 @@ void _LogMe(XTensor * a);
/* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Log(const XTensor & a);
/* set every entry to its logarithm value */
void Log(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its round value */
void _Round(const XTensor * a, XTensor * b);
......@@ -97,6 +111,8 @@ void _RoundMe(XTensor * a);
/* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a);
/* set every entry to its round value */
void Round(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its sqrt value */
void _Sqrt(const XTensor * a, XTensor * b);
......@@ -106,6 +122,8 @@ void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a);
/* set every entry to its sqrt value */
void Sqrt(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its square value */
void _Square(const XTensor * a, XTensor * b);
......@@ -115,6 +133,8 @@ void _SquareMe(XTensor * a);
/* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a);
/* set every entry to its square value */
void Square(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its sine value */
......@@ -125,6 +145,8 @@ void _SinMe(XTensor * a);
/* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sin(const XTensor & a);
/* set every entry to its sine value */
void Sin(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its cosine value */
void _Cos(const XTensor * a, XTensor * b);
......@@ -134,6 +156,8 @@ void _CosMe(XTensor * a);
/* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Cos(const XTensor & a);
/* set every entry to its cosine value */
void Cos(const XTensor & a, XTensor & b, bool requireLink = false);
/* set every entry to its tangent value */
void _Tan(const XTensor * a, XTensor * b);
......@@ -143,6 +167,8 @@ void _TanMe(XTensor * a);
/* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a);
/* set every entry to its tangent value */
void Tan(const XTensor & a, XTensor & b, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -229,10 +229,10 @@ XTensor CopyIndexed(const XTensor & s, int dim,
/* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, &srcIndex, &tgtIndex, copyNum);
XList list(3);
list.Add(&s);
list.Add(&srcIndex);
list.Add(&tgtIndex);
TensorList list(3);
list.Add((XTensor*)&s);
list.Add((XTensor*)&srcIndex);
list.Add((XTensor*)&tgtIndex);
/* tensor connection */
XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED);
......
......@@ -131,4 +131,43 @@ XTensor ReduceMax(const XTensor &input, int dim)
return output;
}
/*
get the max value of the items along a dimension of the tensor
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceMax function */
_ReduceMax(&input, &output, dim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -35,6 +35,9 @@ make a new tensor to keep the result and return it
*/
XTensor ReduceMax(const XTensor &input, int dim);
/* get the max value of the items along a dimension of the tensor. */
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCEMAX_H__
......@@ -86,4 +86,45 @@ XTensor ReduceMean(const XTensor &input, int dim)
return output;
}
/*
get the mean value along a dimension of the tensor
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceMean function */
_ReduceMean(&input, &output, dim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -39,6 +39,12 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/
XTensor ReduceMean(const XTensor &input, int dim);
/*
get the mean value along a dimension of the tensor
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCEMEAN_H__
......@@ -244,6 +244,39 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
return output;
}
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power, bool isExp, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
}
}
/*
sum the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it
......@@ -290,4 +323,52 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
return output;
}
/*
sum the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^power if isExp == false
sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> shift - shift the input
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
>> requireLink - if add operation to network
*/
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool isExp, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -44,6 +44,8 @@ sum = \sum_i exp(a_i - shift) if isExp == true
*/
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false, bool requireLink = false);
/*
sum the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it
......@@ -53,6 +55,14 @@ sum = \sum_i exp(a_i) if isExp == true
*/
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false);
/*
sum the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
*/
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCESUM_H__
......@@ -82,4 +82,46 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
return output;
}
/*
squared sum of the items along a dimension of the tensor
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> shift - bias on the input
>> requireLink - if add operation to network
*/
void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -40,6 +40,13 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
*/
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift);
/*
squared sum of the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
*/
void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCESUMSQUARED_H__
......
......@@ -84,4 +84,47 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
return output;
}
/*
variance of the items along a dimension of the tensor
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> mean - the mean value
>> requireLink - if add operation to network
*/
void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean, bool requireLink)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
if (!output.isInit || !XTensor::IsReduceShaped(&input, &output, dim)) {
int order = input.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = input.dimSize[i];
else if (i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -39,6 +39,12 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean);
/*
variance of the items along a dimension of the tensor
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCEVARIANCE_H__
......@@ -37,7 +37,7 @@ or "Merge" by means of the tensor shapes
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/
void _Concatenate(const XList * smalls, XTensor * big, int dim)
void _Concatenate(const TensorList * smalls, XTensor * big, int dim)
{
bool uniform = true;
for (int i = 1; i < smalls->count; i++) {
......@@ -66,7 +66,7 @@ or "Merge" by means of the tensor shapes
>> dim - which dimension we perform the concatenation
<< return - the tensor of concatenating a list of tensors along a given dimension
*/
XTensor Concatenate(const XList &smalls, int dim)
XTensor Concatenate(const TensorList &smalls, int dim)
{
CheckNTErrors(smalls.count > 0, "Empty list!");
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
......@@ -147,9 +147,9 @@ concatenate two tensors along a given dimension
*/
void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim)
{
XList smalls(2);
smalls.Add(smallA);
smalls.Add(smallB);
TensorList smalls(2);
smalls.Add((XTensor*)smallA);
smalls.Add((XTensor*)smallB);
_Concatenate(&smalls, big, dim);
}
......@@ -168,9 +168,9 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
{
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
XList smalls(2);
smalls.Add(&smallA);
smalls.Add(&smallB);
TensorList smalls(2);
smalls.Add((XTensor*)&smallA);
smalls.Add((XTensor*)&smallB);
bool uniform = true;
for (int i = 1; i < smalls.count; i++) {
......
......@@ -31,7 +31,7 @@ concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects
"ConcatenateSolely" or "Merge" by means of the tensor shapes
*/
void _Concatenate(const XList * smalls, XTensor * big, int dim);
void _Concatenate(const TensorList * smalls, XTensor * big, int dim);
/*
concatenate a list of tensors along a given dimension (return an XTensor structure)
......@@ -39,7 +39,7 @@ make a new tensor to keep the result and return it
Note that this is actually a wrapper that selects
"ConcatenateSolely" or "Merge" by means of the tensor shapes
*/
XTensor Concatenate(const XList &smalls, int dim);
XTensor Concatenate(const TensorList &smalls, int dim);
/* concatenate two tensors along a given dimension */
void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim);
......
......@@ -34,7 +34,7 @@ concatenate a list of tensors along a given dimension
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/
void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
{
CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!");
......@@ -85,12 +85,12 @@ void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
}
}
else {
XList * sourceArrays = new XList(smalls->count);
StrList* sourceArrays = new StrList(smalls->count);
int * blockSizes = new int[smalls->count];
for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i);
blockSizes[i] = stride * tensor->dimSizeRDI[dimRDI] * tensor->unitSize;
sourceArrays->Add(tensor->data);
sourceArrays->Add((char*)tensor->data);
}
_MergeBlockLists(sourceArrays, blockSizes, blockNum, big->data, big->mem);
......
......@@ -27,7 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* concatenate a list of tensors along a given dimension */
void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim);
void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -148,6 +148,39 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
}
}
bool CheckMergeSize(const XTensor * s, const XTensor * t, int whereToMerge, int leadingDim)
{
if (!(s && t))
return false;
if (!(s->dataType == t->dataType))
return false;
if (leadingDim < 0)
leadingDim = 0;
int order = s->order - 1;
int * dimSize = new int[order];
for (int i = 0; i < s->order; i++) {
if (i < leadingDim)
dimSize[i] = s->dimSize[i];
else if (i > leadingDim) {
if (i != whereToMerge)
dimSize[i - 1] = s->dimSize[i];
else
dimSize[i - 1] = s->dimSize[i] * s->dimSize[leadingDim];
}
}
for (int i = 0; i < order; i++) {
if (dimSize[i] != t->dimSize[i])
return false;
}
return true;
}
/*
transform a tensor by merging it along with a dimension (return an XTensor structure)
make a new tensor to keep the result and return it
......@@ -199,6 +232,43 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
return t;
}
void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool requireLink)
{
if (!t.isInit || !CheckMergeSize(&s, &t, whereToMerge, leadingDim)) {
if (leadingDim < 0)
leadingDim = 0;
int order = s.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < s.order; i++) {
if (i < leadingDim)
dimSize[i] = s.dimSize[i];
else if (i > leadingDim) {
if (i != whereToMerge)
dimSize[i - 1] = s.dimSize[i];
else
dimSize[i - 1] = s.dimSize[i] * s.dimSize[leadingDim];
}
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
InitTensor(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge);
XLink::AddParamToHeadInt(&t, leadingDim);
}
}
/*
merge small tensors into a big tensor
......@@ -206,7 +276,7 @@ merge small tensors into a big tensor
>> big - the merged tensor (for return)
>> whereToMerge - the merging operation is along with which dimension
*/
void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
void _Merge(const TensorList * smalls, XTensor * big, int whereToMerge)
{
whereToMerge = (whereToMerge < 0 ? big->order - 1 : whereToMerge);
......@@ -217,15 +287,15 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
bool uniform = true;
int mergeNum = smalls->count;
XTensor* smallsItem0 = (XTensor*)(smalls->GetItem(0));
XTensor* smallsItem0 = smalls->GetItem(0);
int itemSize = smallsItem0->unitNum * smallsItem0->unitSize;
for (int i = 0; i < smalls->count; i++) {
XTensor* smallsItem = (XTensor*)smalls->GetItem(i);
XTensor* smallsItem = smalls->GetItem(i);
CheckNTErrors((big->unitNum == smallsItem->unitNum * mergeNum), "Unmatched tensors!");
if (i > 0) {
XTensor * preItem = (XTensor*)smalls->GetItem(i - 1);
XTensor * preItem = smalls->GetItem(i - 1);
if (smallsItem->unitNum * smallsItem->unitSize != (char*)smallsItem->data - (char*)preItem->data)
uniform = false;
}
......@@ -237,7 +307,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
int gridNum = 1;
int mergedNum = smalls->count;
XTensor * s0 = (XTensor*)smalls->GetItem(0);
XTensor * s0 = smalls->GetItem(0);
int whereToMergeRDI = s0->order - whereToMerge - 1;
for (int i = 0; i < s0->order; i++) {
if (i <= whereToMergeRDI)
......@@ -263,7 +333,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
for (int g = 0; g < gridNum; g++) {
char * tData = (char*)big->data + g * blockSize * blockNum * big->unitSize;
for (int k = 0; k < mergedNum; k++) {
XTensor * s = (XTensor*)smalls->GetItem(k);
XTensor * s = smalls->GetItem(k);
char * sData = (char*)s->data + g * blockSize * blockNum * s->unitSize;
XMemCopy2D(tData + k * tStep, tPtich, big->devID,
sData + k * sStep, sPitch, s->devID,
......@@ -295,7 +365,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
/* copy from source to tmp */
if (!uniform) {
for (int i = 0; i < mergeNum; i++) {
XTensor* smallsItem = (XTensor*)smalls->GetItem(i);
XTensor* smallsItem = smalls->GetItem(i);
XMemCopy((char*)(tensorTMP->data) + (itemSize * i), tensorTMP->devID, smallsItem->data, smallsItem->devID, itemSize);
}
}
......@@ -322,9 +392,9 @@ make a new tensor to keep the result and return it
>> whereToMerge - the merging operation is along with which dimension
<< return - the big tensor merged by small tensors
*/
XTensor Merge(const XList &smalls, int whereToMerge)
XTensor Merge(const TensorList &smalls, int whereToMerge)
{
XTensor * tensor = (XTensor*)smalls.GetItem(0);
XTensor * tensor = smalls.GetItem(0);
int order = tensor->order;
int * dimSize = new int[order];
for (int i = 0; i < tensor->order; i++) {
......@@ -375,9 +445,9 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
XTensor big(order, dimSize, smallA.dataType, dr, smallA.devID, smallA.mem);
big.SetTMPFlag();
XList smalls(2);
smalls.Add(&smallA);
smalls.Add(&smallB);
TensorList smalls(2);
smalls.Add((XTensor*)&smallA);
smalls.Add((XTensor*)&smallB);
/* call _Merge function */
_Merge(&smalls, &big, whereToMerge);
......
......@@ -33,15 +33,21 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -
e.g., (M, N/3, 3) -> (M, N) */
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1);
void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim = -1, bool requireLink = false);
/* merge small tensors into a big tensor */
void _Merge(const XList * smalls, XTensor * big, int whereToMerge);
void _Merge(const TensorList * smalls, XTensor * big, int whereToMerge);
/* merge small tensors into a big tensor (return an XTensor structure) */
XTensor Merge(const XList &smalls, int whereToMerge);
XTensor Merge(const TensorList &smalls, int whereToMerge);
void Merge(const TensorList &smalls, XTensor &t, int whereToMerge);
/* merge two tensors into a big tensor (return an XTensor structure) */
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge);
void Merge(const XTensor &smallA, const XTensor &smallB, XTensor &t, int whereToMerge);
} // namespace nts(NiuTrans.Tensor)
#endif // __MERGE_H__
\ No newline at end of file
......@@ -34,7 +34,7 @@ merge data by blocks
>> target - target data array
>> myMem - memory pool
*/
void _MergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
void _MergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
{
if (myMem != NULL && myMem->devID >= 0) {
#ifdef USE_CUDA
......
......@@ -71,7 +71,7 @@ merge data by blocks (cuda version)
>> target - target data array
>> myMem - the memory pool
*/
void _CudaMergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
void _CudaMergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
{
CheckNTErrors((myMem != NULL), "No memory pool!");
CheckNTErrors((myMem->devID >= 0), "Wrong device to run!");
......
......@@ -33,7 +33,7 @@ __global__
void KernelCopyBlockLists(DTYPE ** sourceList, int * sourceBlockSizes, int sourceBlockNum, DTYPE ** targetList);
/* merge data by blocks (cuda version) */
void _CudaMergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
void _CudaMergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
#endif // USE_CUDA
......
......@@ -27,7 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* merge data by blocks */
void _MergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
void _MergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -48,4 +48,19 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
return t;
}
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink)
{
if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) {
InitTensor(&t, &s);
}
/* call Reshape function */
t.Reshape(order, dimSize);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -29,5 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* reshape the tensor */
XTensor Reshape(XTensor &s, int order, int * dimSize);
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __RESHAPE_H__
......@@ -156,6 +156,33 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
}
}
bool CheckSplitSize(const XTensor * s, const XTensor * t, int whereToSplit, int splitNum)
{
if (!(s && t))
return false;
if (!(s->dataType == t->dataType))
return false;
int order = s->order + 1;
int * dimSize = new int[order];
dimSize[0] = splitNum;
for (int i = 0; i < s->order; i++) {
if (i == whereToSplit)
dimSize[i + 1] = s->dimSize[i] / splitNum;
else
dimSize[i + 1] = s->dimSize[i];
}
for (int i = 0; i < order; i++) {
if (dimSize[i] != t->dimSize[i])
return false;
}
return true;
}
/*
transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) (return an XTensor structure)
make a new tensor to keep the result and return it
......@@ -200,6 +227,38 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
return t;
}
void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool requireLink)
{
if (!t.isInit || !CheckSplitSize(&s, &t, whereToSplit, splitNum)) {
int order = s.order + 1;
int * dimSize = new int[order];
dimSize[0] = splitNum;
for (int i = 0; i < s.order; i++) {
if (i == whereToSplit)
dimSize[i + 1] = s.dimSize[i] / splitNum;
else
dimSize[i + 1] = s.dimSize[i];
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
InitTensor(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _Split function */
_Split(&s, &t, whereToSplit, splitNum);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit);
XLink::AddParamToHeadInt(&t, splitNum);
}
}
/*
split a big tensor into small tensors
......@@ -209,7 +268,7 @@ split a big tensor into small tensors
>> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits
*/
void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum)
{
CheckNTErrors((smalls != NULL), "Invalid list!");
CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!");
......@@ -340,7 +399,7 @@ split a big tensor into small tensors
>> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits
*/
void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum)
void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNum)
{
CheckNTErrors(big.GetDim(whereToSplit) % splitNum == 0, "Wrong splitNum!");
......
......@@ -41,14 +41,16 @@ e.g., (M, N) -> (M, N/3, 3)
*/
XTensor Split(const XTensor &s, int whereToSplit, int splitNum);
void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool requireLink = false);
/* split a big tensor into small tensors */
void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum);
void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum);
/*
split a big tensor into small tensors (return a XList structure)
split a big tensor into small tensors (return a TensorList structure)
make a new list to keep the result and return it
*/
void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum);
void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNum);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -112,4 +112,19 @@ XTensor Squeeze(XTensor & source, int leadingDim)
return target;
}
void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLink)
{
if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) {
InitTensor(&target, &source);
}
/* call _Squeeze function */
_Squeeze(&source, &target, leadingDim);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -37,6 +37,8 @@ void _SqueezeMe(XTensor * source, int leadingDim = -1);
make a new tensor to keep the result and return it */
XTensor Squeeze(XTensor & source, int leadingDim = -1);
void Squeeze(XTensor & source, XTensor & target, int leadingDim = -1, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __SQUEEZE_H__
\ No newline at end of file
......@@ -78,7 +78,7 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
#endif
}
else {
XList * sourceArrays = new XList(blockNumB);
StrList * sourceArrays = new StrList(blockNumB);
int * blockSizes = new int[blockNumB];
for (int i = 0; i < blockNumA; i++) {
......@@ -96,6 +96,34 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
}
}
bool CheckUnsqueezeSize(const XTensor * a, const XTensor * b, int dim, int dSize)
{
if (!(a && b))
return false;
if (!(a->dataType == b->dataType))
return false;
int order = a->order + 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = a->dimSize[i];
else if (i == dim)
dimSize[i] = dSize;
else
dimSize[i] = a->dimSize[i - 1];
}
for (int i = 0; i < order; i++) {
if (dimSize[i] != b->dimSize[i])
return false;
}
return true;
}
/*
insert a dimension by copying the blocks for x times
(where x is the size of the inerted dimension) (returna a XTensor structure)
......@@ -138,4 +166,37 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
return b;
}
void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLink)
{
if (!b.isInit || !CheckUnsqueezeSize(&a, &b, dim, dSize)) {
int order = a.order + 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = a.dimSize[i];
else if (i == dim)
dimSize[i] = dSize;
else
dimSize[i] = a.dimSize[i - 1];
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
InitTensor(&b, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */
delete[] dimSize;
}
/* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize);
if (requireLink) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&b, dSize);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -35,6 +35,8 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize);
make a new tensor to keep the result and return it */
XTensor Unsqueeze(const XTensor &a, int dim, int dSize);
void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLink = false);
} // namespace nts(NiuTrans.Tensor)
#endif // __UNSQUEEZE_H__
......@@ -114,12 +114,12 @@ void Sort(XTensor & a, XTensor & b, XTensor & index, int dim)
_Sort(&a, &b, &index, dim);
/* tensor connections */
XList list(2);
list.Add(&b);
list.Add(&index);
XLink::MakeLink(&a, &list, SORT_SORT);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&index, dim);
//TensorList list(2);
//list.Add(&b);
//list.Add(&index);
// XLink::MakeLink(&a, &list, SORT_SORT);
// XLink::AddParamToHeadInt(&b, dim);
// XLink::AddParamToHeadInt(&index, dim);
}
} // namespace nts(NiuTrans.Tensor)
......@@ -128,14 +128,14 @@ void TopK(XTensor &a, XTensor &b, XTensor &index, int dim, int k)
_TopK(&a, &b, &index, dim, k);
/* tensor connection */
XList list(2);
list.Add(&b);
list.Add(&index);
XLink::MakeLink(&a, &list, SORT_TOPK);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&index, k);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&index, k);
//TensorList list(2);
//list.Add(&b);
//list.Add(&index);
//XLink::MakeLink(&a, &list, SORT_TOPK);
//XLink::AddParamToHeadInt(&b, dim);
//XLink::AddParamToHeadInt(&index, k);
//XLink::AddParamToHeadInt(&b, dim);
//XLink::AddParamToHeadInt(&index, k);
}
......
......@@ -31,7 +31,7 @@ flush a list of XTensor to GPU memory
>> devID - target GPU id
>> GPUMem - memory pool for the GPU
*/
void CPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
void CPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
{
#ifdef USE_CUDA
CudaCPUToGPUFlush(mList, devID, GPUMem);
......
......@@ -32,7 +32,7 @@ flush a list of XTensor to GPU memory
>> devID - target GPU id
>> GPUMem - memory pool for the GPU
*/
void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
{
if (mList == NULL || mList->count == 0)
return;
......
......@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* flush a list of XTensor to GPU memory */
void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem);
void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem);
/* copy the data from GPU memory to CPU memory */
void CudaGPUToCPUFlush(XTensor * tensor);
......
......@@ -27,7 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* flush a list of XTensor to GPU memory */
void CPUToGPUFlush(XList * mList, int devID, XMem * GPUMem);
void CPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem);
/* copy the data from GPU memory to CPU memory */
void GPUToCPUFlush(XTensor * tensor);
......
......@@ -51,19 +51,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
CheckNTErrors(jobNum != 0, "TODO!");
/* argument list of the jobs */
XList * jobArgList = new XList(4);
TensorList * jobArgList = new TensorList(argNum);
va_list ap;
va_start(ap, argNum);
for (int i = 0; i < argNum; i++) {
void * p = va_arg(ap, void*);
XTensor* p = va_arg(ap, XTensor*);
jobArgList->Add(p);
}
va_end(ap);
/* prepare the neccesary argument list for parallel processing */
XList * jobs = new XList(jobNum);
XList * args = new XList(jobNum);
TensorList * jobs = new TensorList(jobNum);
TensorList * args = new TensorList(jobNum);
int * indexList = new int[jobNum * 4 * 4];
......@@ -77,27 +77,30 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
2. other arguments
*/
for (int i = 0; i < jobNum; i++) {
XList * blockArgs = new XList(argNum + 4);
IntList* indexArgs = new IntList(4);
TensorList * blockArgs = new TensorList(argNum);
int * blockIndex = indexList + i * 4;
blockArgs->Add(blockIndex);
blockArgs->Add(blockIndex + 1);
blockArgs->Add(blockIndex + 2);
blockArgs->Add(blockIndex + 3);
indexArgs->Add(blockIndex[0]);
indexArgs->Add(blockIndex[1]);
indexArgs->Add(blockIndex[2]);
indexArgs->Add(blockIndex[3]);
for (int j = 0; j < argNum; j++)
blockArgs->Add(jobArgList->GetItem(j));
args->Add(blockArgs);
jobs->Add((void*)job);
args->Add((XTensor*)indexArgs);
args->Add((XTensor*)blockArgs);
jobs->Add((XTensor*)job);
}
args->count = nblock;
args->count = jobNum * 2;
jobs->count = nblock;
/* single job */
if (jobNum == 1)
((TFunction)job)((XList*)args->GetItem(0));
((TFunction)job)(args);
/* multiple jobs */
else
parallelRunner->Run(jobs, args);
......@@ -105,7 +108,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
/* free the memory */
delete[] indexList;
for (int i = 0; i < args->count; i++) {
XList * blockArgs = (XList*)args->GetItem(i);
TensorList * blockArgs = (TensorList*)args->GetItem(i);
delete blockArgs;
}
delete args;
......
......@@ -21,12 +21,14 @@
#include "../XName.h"
#include <time.h>
#include <math.h>
#include "Dropout.h"
#include "Dropout.cuh"
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/getandset/SetData.h"
#include "DropoutWithIndex.h"
namespace nts{ // namespace nts(NiuTrans.Tensor
......@@ -147,6 +149,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask;
int * maskArrayInt = NULL;
DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
......@@ -157,6 +160,23 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
return Multiply(x, mask);
/* dropout with index */
/*int unitNum = floor(x.unitNum*dropProb);
maskArrayInt = new int[unitNum];
for (int i = 0; i < unitNum; i++)
maskArrayInt[i] = rand() % x.unitNum;
XTensor maskindex;
InitTensor1D(&maskindex, unitNum, X_INT, x.devID, x.mem);
maskindex.SetData(maskArrayInt, unitNum);
delete[] maskArrayInt;
return DropoutWithIndex(x, maskindex, scaleFactor);*/
}
else if(leadingDim2 < 0){
int n = leadingDim;
......@@ -209,7 +229,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
return MultiplyBroadcast(x, mask);
}
}
/*
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#include "DropoutWithIndex.h"
#include "DropoutWithIndex.cuh"
#include "../core/CHeader.h"
#include "../XName.h"
#include "Identity.h"
namespace nts {
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
*/
void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
{
CheckNTErrors(maskIndex->order == 1, "Illegal tensor order!");
#ifdef USE_CUDA
if (maskIndex->devID >= 0 || x->devID >= 0 || c->devID >= 0) {
_CudaDropoutWithIndex(x, maskIndex, c);
return;
}
#endif
// TODO!!
ShowNTErrors("TODO!");
}
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
>> scale - scale factor
*/
XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
{
XTensor c;
int order = x.order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = x.dimSize[i];
}
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c);
c.Reshape(order, dimSize);
_MultiplyMe(&c, &x);
_ScaleAndShiftMe(&c, scale);
/* tensor connections */
XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
XLink::AddParamToHead(&c, scale);
return c;
}
}// namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#include "DropoutWithIndex.cuh"
#include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
__global__
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> tData - the data pointer of the target tensor
>> sIndex - mask index
>> size - the size of the sIndex
*/
void KernelDropoutWithIndex1D(DTYPE * tData, int * sIndex, int size)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
DTYPE * t = tData;
if (i < size) {
int id = sIndex[i];
t[id] = DTYPE(0.0F);
}
}
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
*/
void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
{
int devID = c->devID;
int blockNum = maskIndex->unitNum;
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread(devID, blockNum, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0]);
dim3 threads(cudaBlocks[0]);
DTYPE * tData = (DTYPE*)c->data;
int * sIndex = NULL;
sIndex = (int *)maskIndex->data;
KernelDropoutWithIndex1D <<<blocks, threads >>>(tData, sIndex, blockNum);
BacktoCudaDev(devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#ifndef __DROPOUTWITHINDEX_CUH__
#define __DROPOUTWITHINDEX_CUH__
#include "../XTensor.h"
#include "DropoutWithIndex.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* dropout with index (cuda version) */
void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUTWITHINDEX_CUH__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#ifndef __DROPOUTWITHINDEX_H__
#define __DROPOUTWITHINDEX_H__
#include "../XTensor.h"
namespace nts {
void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
XTensor DropoutWithIndex(const XTensor &x, XTensor &mask, DTYPE scale);
} // namespace nts(NiuTrans.Tensor)
#endif // !__DROPOUTWITHINDEX_H__
......@@ -26,7 +26,6 @@
#include "../XTensor.h"
#include "CrossEntropy.h"
#include "Dropout.h"
#include "HardTanH.h"
#include "Identity.h"
......
......@@ -23,7 +23,7 @@
#include "../XName.h"
#include "HardTanH.h"
#include "HardTanH.cuh"
#include "CrossEntropy.h"
#include "../loss/LHeader.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -84,6 +84,21 @@ XTensor HardTanH(const XTensor &x)
return y;
}
void HardTanH(const XTensor &x, XTensor &y, bool requireLink)
{
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
}
/* call _HardTanH function */
_HardTanH(&x, &y);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
}
}
/*
backward computation
......
......@@ -22,7 +22,7 @@
#include "HardTanH.h"
#include "HardTanH.cuh"
#include "Loss.cuh"
#include "CrossEntropy.cuh"
#include "../loss/CrossEntropy.cuh"
#include "../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -40,6 +40,8 @@ void _HardTanH(const XTensor * x, XTensor * y);
/* hard tanh function (return an XTensor structure) */
XTensor HardTanH(const XTensor &x);
void HardTanH(const XTensor &x, XTensor &y, bool requireLink = false);
/* de/dx */
void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
......
......@@ -21,7 +21,7 @@
#include "../XName.h"
#include "Identity.h"
#include "CrossEntropy.h"
#include "../loss/LHeader.h"
#include "../XUtility.h"
#include "../core/movement/CopyValues.h"
......@@ -57,6 +57,22 @@ XTensor Identity(const XTensor &x)
return y;
}
void Identity(const XTensor &x, XTensor &y, bool requireLink)
{
if (!y.isInit || !y.IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
}
/* call _Identity function */
_Identity(&x, &y);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
}
}
/*
backward computation for identity function y = x
......
......@@ -33,6 +33,8 @@ void _Identity(const XTensor * x, XTensor * y);
/* identity function y = x (return an XTensor structure) */
XTensor Identity(const XTensor &x);
void Identity(const XTensor &x, XTensor &y, bool requireLink = false);
/* de/dx */
void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
......
......@@ -194,6 +194,25 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
return y;
}
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink)
{
int ld = leadDim;
if (ld < 0)
ld = x.order - 1;
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
}
/* call _LogSoftmax function */
_LogSoftmax(&x, &y, ld);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
}
}
/*
log scale softmax y = log(e^x / \sum_{i} e^{x_i})
make a new tensor to keep the result and return it
......
......@@ -33,6 +33,8 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return an XTensor structure) */
XTensor LogSoftmax(const XTensor &x, int leadDim);
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink = false);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (with both argument of x and y) */
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim);
......
......@@ -22,7 +22,7 @@
#include "../XName.h"
#include "Rectify.h"
#include "Rectify.cuh"
#include "CrossEntropy.h"
#include "../loss/LHeader.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -77,6 +77,20 @@ XTensor Rectify(const XTensor &x)
return y;
}
void Rectify(const XTensor &x, XTensor &y, bool requireLink)
{
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
}
/* call _Rectify function */
_Rectify(&x, &y);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
}
}
/*
backward computation
......
......@@ -22,7 +22,7 @@
#include "Rectify.h"
#include "Rectify.cuh"
#include "Loss.cuh"
#include "CrossEntropy.cuh"
#include "../loss/CrossEntropy.cuh"
#include "../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -33,6 +33,8 @@ void _Rectify(const XTensor * x, XTensor * y);
/* rectify function y = max(0, x) (return an XTensor structure) */
XTensor Rectify(const XTensor &x);
void Rectify(const XTensor &x, XTensor &y, bool requireLink = false);
/* de/dx */
void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
......
......@@ -23,7 +23,7 @@
#include <math.h>
#include "Sigmoid.h"
#include "Sigmoid.cuh"
#include "CrossEntropy.h"
#include "../loss/LHeader.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -75,6 +75,21 @@ XTensor Sigmoid(const XTensor &x)
return y;
}
void Sigmoid(const XTensor &x, XTensor &y, bool requireLink)
{
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
}
/* call _Sigmoid function */
_Sigmoid(&x, &y);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
}
}
/*
backward computation
......
......@@ -22,7 +22,7 @@
#include "Sigmoid.h"
#include "Sigmoid.cuh"
#include "Loss.cuh"
#include "CrossEntropy.cuh"
#include "../loss/CrossEntropy.cuh"
#include "../XDevice.h"
#ifdef USE_CUDA
......
......@@ -33,6 +33,8 @@ void _Sigmoid(const XTensor * x, XTensor * y);
/* sigmoid function y = 1/(1+exp(-x)) (return an XTensor structure) */
XTensor Sigmoid(const XTensor &x);
void Sigmoid(const XTensor &x, XTensor &y, bool requireLink = false);
/* de/dx */
void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
......
......@@ -148,6 +148,26 @@ XTensor Softmax(const XTensor &x, int leadDim)
return y;
}
void Softmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink)
{
int ld = leadDim;
if (ld < 0)
ld = x.order - 1;
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
}
/* call _Softmax function */
_Softmax(&x, &y, ld);
if (requireLink) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
}
}
/*
backward computation for dense tensors
......
......@@ -372,27 +372,16 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int * dimSize = new int[y->order];
for(int i = 0; i < y->order; i++){
if(i < leadDim)
dimSize[i] = -y->dimSize[i];
dimSize[i] = y->dimSize[i];
else if(i > leadDim)
dimSize[i - 1] = -y->dimSize[i];
dimSize[i - 1] = y->dimSize[i];
}
XMem * mem = y->mem;
/* make a matrix of the same size as the y (i.e., y) */
XTensor * ytmp = NewTensor(y, false);
XTensor * ytmp = NewTensor(y);
/* make a matrix to keep \beta */
XTensor * beta = new XTensor(y->order - 1, dimSize, y->dataType, y->denseRatio, y->devID, mem);
if(mem != NULL){
ytmp->data = mem->AllocBuf(mem->devID, y->unitNum * y->unitSize);
beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize);
}
else{
ytmp->data = XMemAlloc(y->devID, y->unitNum * y->unitSize);
beta->data = XMemAlloc(y->devID, beta->unitNum * beta->unitSize);
}
XTensor * beta = NewTensor(y->order - 1, dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
/* \beta = \sum_i (dE/dy_i * y_i) */
_Multiply(dedy, y, ytmp, 0, 0);
......@@ -405,19 +394,6 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
_Multiply(y, ytmp, dedx, 0, 0);
if(mem != NULL){
mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize);
mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize);
}
else{
XMemFree(y->devID, ytmp->data);
XMemFree(y->devID, beta->data);
}
ytmp->data = NULL;
beta->data = NULL;
delete[] dimSize;
delete ytmp;
delete beta;
......
......@@ -33,6 +33,8 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (return an XTensor structure) */
XTensor Softmax(const XTensor &x, int leadDim);
void Softmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink = false);
/* de/dx */
void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
......
......@@ -22,6 +22,8 @@
#include <math.h>
#include "CrossEntropy.h"
#include "CrossEntropy.cuh"
#include "../XTensor.h"
#include "../XName.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/math/Unary.h"
......@@ -61,7 +63,7 @@ void _CrossEntropy(const XTensor * output, const XTensor * gold,
CheckNTErrors(loss->order == output->order - 1, "Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE, "TODO!");
XTensor * interBuf1 = NewTensorBuf(output, output->devID, output->mem);
/*XTensor * interBuf1 = NewTensorBuf(output, output->devID, output->mem);
XTensor * interBuf2 = NewTensorBuf(output, output->devID, output->mem);
_Log(output, interBuf1);
......@@ -76,7 +78,23 @@ void _CrossEntropy(const XTensor * output, const XTensor * gold,
_MultiplyMe(loss, padding);
DelTensorBuf(interBuf2);
DelTensorBuf(interBuf1);
DelTensorBuf(interBuf1);*/
XTensor * inter = NewTensor(output);
_Log(output, inter);
_MultiplyMe(inter, gold);
if(weight != NULL)
_MultiplyDimMe(inter, weight, n);
_NegateMe(inter);
_ReduceSum(inter, loss, n);
if(padding != NULL)
_MultiplyMe(loss, padding);
DelTensor(inter);
}
/*
......@@ -223,6 +241,93 @@ void _CrossEntropyFast(const XTensor * output, const XTensor * gold,
}
/*
*/
XTensor GetReduceTensor(const XTensor & input, int dim)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMPFlag();
return output;
}
/*
compute the cross entropy loss (return an XTensor structure)
make a new tensor to keep the result and return it
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
int leadingDim)
{
int dim = leadingDim < 0 ? output.order - 1 : leadingDim;
XTensor loss;
loss = GetReduceTensor(output, dim);
XTensor * weight = NULL;
XTensor * padding = NULL;
/* call _CrossEntropy function */
_CrossEntropy(&output, &gold, &loss, weight, padding, dim);
/* tensor connection */
TensorList tails(4);
tails.Add((XTensor*)&output);
tails.Add((XTensor*)&gold);
tails.Add(weight);
tails.Add(padding);
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim);
return loss;
}
XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
const XTensor & padding,
int leadingDim)
{
int dim = leadingDim < 0 ? output.order - 1 : leadingDim;
XTensor loss;
loss = GetReduceTensor(output, dim);
XTensor * weight = NULL;
/* call _CrossEntropy function */
_CrossEntropy(&output, &gold, &loss, weight, &padding, dim);
/* tensor connection */
TensorList tails(4);
tails.Add((XTensor*)&output);
tails.Add((XTensor*)&gold);
tails.Add(weight);
tails.Add((XTensor*)&padding);
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim);
return loss;
}
/*
compute the cross entropy loss
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
......@@ -579,16 +684,16 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output,
}
}
//if(padding != NULL) {
// XTensor * tmp = NewTensor(padding);
// _IsNonZero(padding, tmp);
// int nonZeroNum = (int)_ReduceSumAll(tmp);
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
// delete tmp;
//}
//else {
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
//}
if(padding != NULL) {
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
else {
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -196,16 +196,17 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
delete[] dims;
}
//if(padding != NULL) {
// XTensor * tmp = NewTensor(padding);
// _IsNonZero(padding, tmp);
// int nonZeroNum = (int)_ReduceSumAll(tmp);
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
// delete tmp;
//}
//else {
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
//}
if(padding != NULL) {
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
else {
int num = dedy->unitNum / dedy->GetDim(n);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num);
}
}
......
......@@ -41,6 +41,25 @@ void _CrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss */
XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
int leadingDim = -1);
/* compute the cross entropy loss with padding */
XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
const XTensor & padding,
int leadingDim = -1);
/* compute the cross entropy loss with weight */
XTensor CrossEntropyWeight(const XTensor & output, const XTensor & gold,
const XTensor & weight,
int leadingDim = -1);
/* compute the cross entropy loss with weight and padding */
XTensor CrossEntropyWeight(const XTensor & output, const XTensor & gold,
const XTensor & padding, const XTensor & weight,
int leadingDim = -1);
/* compute the cross entropy loss (return the loss) */
DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2019-4-22
*/
/* this is a header to include all loss computations in the "loss" workspace */
#ifndef __LHEADER_H__
#define __LHEADER_H__
#include "CrossEntropy.h"
#endif // __LHEADER_H__
\ No newline at end of file
......@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
bool TestConcatenate1()
{
/* create list */
XList * sList = new XList();
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -157,7 +157,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
bool TestConcatenate2()
{
/* create list */
XList * sList = new XList();
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -286,7 +286,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
bool TestConcatenate3()
{
/* create list */
XList * sList = new XList();
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......
......@@ -31,7 +31,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
bool TestConcatenateSolely1()
{
/* create list */
XList * sList = new XList();
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -154,7 +154,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
bool TestConcatenateSolely2()
{
/* create list */
XList * sList = new XList();
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -279,7 +279,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
bool TestConcatenateSolely3()
{
/* create list */
XList * sList = new XList();
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_CROSSENTROPY_H__
#define __TEST_CROSSENTROPY_H__
#include "../function/CrossEntropy.h"
#include "../loss/CrossEntropy.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -246,7 +246,7 @@ In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
bool TestMerge3()
{
/* create list */
XList * smallList = new XList();
TensorList * smallList = new TensorList();
/* a small tensor of size (2, 4) */
int sOrder = 2;
......@@ -364,7 +364,7 @@ In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
bool TestMerge4()
{
/* create list */
XList * smallList = new XList();
TensorList * smallList = new TensorList();
/* a small tensor of size (2, 4) */
int sOrder = 2;
......
......@@ -125,10 +125,10 @@ bool TestRectify2()
{1.0F, 1.0F, 1.0F} };
DTYPE yAnswer[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE dedyAnswer[2][3] = { {-1.0F, -1.0F, -0.5F},
{-0.5F, -0.25F, -0.2F} };
DTYPE dedxAnswer[2][3] = { {-1.0F, -1.0F, -0.5F},
{-0.5F, -0.25F, -0.2F} };
DTYPE dedyAnswer[2][3] = { {-0.5F, -0.5F, -0.25F},
{-0.25F, -0.125F, -0.1F} };
DTYPE dedxAnswer[2][3] = { {-0.5F, -0.5F, -0.25F},
{-0.25F, -0.125F, -0.1F} };
/* CPU test */
bool cpuTest = true;
......
......@@ -222,8 +222,8 @@ In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
bool TestSplit3()
{
/* create list */
XList * tList = new XList();
XList tUserList;
TensorList * tList = new TensorList();
TensorList tUserList;
/* a source tensor of size (3, 4) */
int sOrder = 2;
......
......@@ -35,7 +35,7 @@ bool Test()
wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong;
wrong = !TestCos() || wrong;
wrong = !TestConvertDataType() || wrong;
//wrong = !TestConvertDataType() || wrong;
wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong;
wrong = !TestDiv() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论