Commit a0aa3d49 by xuchen

merge with huchi branch, fix the bug of binary (without link) and modify the interface of the fnnlm

parent b30fad5f
...@@ -530,7 +530,7 @@ void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient) ...@@ -530,7 +530,7 @@ void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient)
XTensor * dedc = node->grad; XTensor * dedc = node->grad;
XTensor * deda = a->grad; XTensor * deda = a->grad;
XTensor * dedb = b->grad; XTensor * dedb = b->grad;
if(a->order == 2 && b->order == 2) if(a->order == 2 && b->order == 2)
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient); GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){ else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){
...@@ -584,9 +584,19 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra ...@@ -584,9 +584,19 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
if(!isEfficient || a->isGrad) if(!isEfficient || a->isGrad)
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F); _MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
//if (b->id == 15)
// dedb->Dump(stdout, "dedb", 100);
/* dE/db = a^T * dE/dc * \alpha */ /* dE/db = a^T * dE/dc * \alpha */
if(!isEfficient || b->isGrad) if(!isEfficient || b->isGrad)
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F); _MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
//
//if (b->id == 15) {
// a->Dump(stdout, "a", 100);
// dedc->Dump(stdout, "dedc", 100);
// dedb->Dump(stdout, "dedb", 100);
// exit(1);
//}
} }
/* c = a^T * b * \alpha */ /* c = a^T * b * \alpha */
...@@ -1578,10 +1588,8 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient) ...@@ -1578,10 +1588,8 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
/* compute dE/dx, dE/dw */ /* compute dE/dx, dE/dw */
XTensor * c = node; XTensor * c = node;
XTensor * dedc = node->grad; XTensor * dedc = node->grad;
......
...@@ -271,6 +271,11 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -271,6 +271,11 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
//FILE *f = fopen("debug", "a");
//node->Dump(f, "node", 10);
//if (node->grad != NULL)
// node->grad->Dump(f, "node->grad", 10);
} }
else{ else{
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
......
...@@ -586,9 +586,6 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad) ...@@ -586,9 +586,6 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
XTensor * para = (XTensor*)paraList.GetItem(i); XTensor * para = (XTensor*)paraList.GetItem(i);
XTensor * paraGrad = (XTensor*)gradList.GetItem(i); XTensor * paraGrad = (XTensor*)gradList.GetItem(i);
//fprintf(stderr, "%d\n", i);
//paraGrad->Dump(stderr, "grad:", 10);
/* the delta rule */ /* the delta rule */
_Sum(para, paraGrad, para, -epsilon); _Sum(para, paraGrad, para, -epsilon);
} }
...@@ -607,14 +604,14 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) ...@@ -607,14 +604,14 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
InitTensorV2(&probs, &output); InitTensorV2(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */ /* probs[i,j] = output[i,j] * gold[i,j] */
_Multiply(&output, &gold, &probs); Multiply(output, gold, probs);
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1DV2(&wprobs, output.GetDim(0), output.dataType, output.devID); InitTensor1DV2(&wprobs, output.GetDim(0), output.dataType, output.devID);
_ReduceSum(&probs, &wprobs, 1); ReduceSum(probs, wprobs, 1);
if(wordProbs != NULL) if(wordProbs != NULL)
_CopyValues(&wprobs, wordProbs); CopyValues(wprobs, *wordProbs);
/* reshape the tensor to fit it into the reduce procedure /* reshape the tensor to fit it into the reduce procedure
TODO: XTensor supports scalars */ TODO: XTensor supports scalars */
...@@ -626,7 +623,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) ...@@ -626,7 +623,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1DV2(&result, 1, X_FLOAT, output.devID); InitTensor1DV2(&result, 1, X_FLOAT, output.devID);
_ReduceSum(&probs, &result, 1); ReduceSum(probs, result, 1);
return result.Get1D(0); return result.Get1D(0);
} }
...@@ -793,7 +790,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -793,7 +790,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* generate word embedding of position i: /* generate word embedding of position i:
embedding = input * w */ embedding = input * w */
_MatrixMul(&input, X_NOTRANS, &w, X_NOTRANS, &embedding); MatrixMul(input, X_NOTRANS, w, X_NOTRANS, embedding);
eList.Add(&net.embeddings[i]); eList.Add(&net.embeddings[i]);
} }
...@@ -801,7 +798,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -801,7 +798,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* concatenate word embeddings /* concatenate word embeddings
embeddingcat = cat(embedding_0...embedding_{n-1}) */ embeddingcat = cat(embedding_0...embedding_{n-1}) */
InitModelTensor2D(net.embeddingCat, batchSize, (n - 1) * model.eSize, model); InitModelTensor2D(net.embeddingCat, batchSize, (n - 1) * model.eSize, model);
_Concatenate(&eList, &net.embeddingCat, 1); Concatenate(eList, net.embeddingCat, 1);
/* go over each hidden layer */ /* go over each hidden layer */
for(int i = 0; i < depth; i++){ for(int i = 0; i < depth; i++){
...@@ -816,22 +813,22 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -816,22 +813,22 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* generate hidden states of layer i: /* generate hidden states of layer i:
s = h_pre * w */ s = h_pre * w */
_MatrixMul(&h_pre, X_NOTRANS, &w, X_NOTRANS, &s); MatrixMul(h_pre, X_NOTRANS, w, X_NOTRANS, s);
/* make a 2d tensor for the bias term */ /* make a 2d tensor for the bias term */
XTensor b2D; XTensor b2D;
InitTensorV2(&b2D, &s); InitTensorV2(&b2D, &s);
_Unsqueeze(&b, &b2D, 0, batchSize); Unsqueeze(b, b2D, 0, batchSize);
/* introduce bias term: /* introduce bias term:
s = s + b s = s + b
NOTE: the trick here is to extend b to a 2d tensor NOTE: the trick here is to extend b to a 2d tensor
to fit into the 2d representation in tensor summation */ to fit into the 2d representation in tensor summation */
_Sum(&s, &b2D, &s); Sum(s, b2D, s);
/* pass the state through the hard tanh function: /* pass the state through the hard tanh function:
h = tanh(s) */ h = tanh(s) */
_HardTanH(&s, &h); HardTanH(s, h);
} }
/* generate the output Pr(w_{n-1}|w_0...w_{n-2}): /* generate the output Pr(w_{n-1}|w_0...w_{n-2}):
...@@ -849,16 +846,16 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -849,16 +846,16 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
InitModelTensor2D(y, batchSize, model.vSize, model); InitModelTensor2D(y, batchSize, model.vSize, model);
/* s = h_last * w */ /* s = h_last * w */
_MatrixMul(&h_last, X_NOTRANS, &w, X_NOTRANS, &s); MatrixMul(h_last, X_NOTRANS, w, X_NOTRANS, s);
XTensor b2D; XTensor b2D;
InitTensorV2(&b2D, &s); InitTensorV2(&b2D, &s);
_Unsqueeze(&b, &b2D, 0, batchSize); Unsqueeze(b, b2D, 0, batchSize);
_Sum(&s, &b2D, &s); Sum(s, b2D, s);
/* y = softmax(s) */ /* y = softmax(s) */
_LogSoftmax(&s, &y, 1); LogSoftmax(s, y, 1);
} }
} }
...@@ -900,18 +897,18 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -900,18 +897,18 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
x is the top most hidden layer) x is the top most hidden layer)
so we know so we know
dE/dw = x^T * dE/ds */ dE/dw = x^T * dE/ds */
_MatrixMul(&x, X_TRANS, &deds, X_NOTRANS, &dedw); MatrixMul(x, X_TRANS, deds, X_NOTRANS, dedw);
/* gradient of the bias: dE/db = dE/ds * 1 = dE/ds /* gradient of the bias: dE/db = dE/ds * 1 = dE/ds
specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */ specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */
_ReduceSum(&deds, &dedb, 0); ReduceSum(deds, dedb, 0);
/* then, we compute /* then, we compute
dE/dx_{j} = \sum_j' (dE/ds_{j'} * ds_{j'}/dx_j) dE/dx_{j} = \sum_j' (dE/ds_{j'} * ds_{j'}/dx_j)
= \sum_j' (dE/ds_{j'} * w_{j, j'}) = \sum_j' (dE/ds_{j'} * w_{j, j'})
i.e., i.e.,
dE/dx = dE/ds * w^T */ dE/dx = dE/ds * w^T */
_MatrixMul(&deds, X_NOTRANS, &w, X_TRANS, &dedx); MatrixMul(deds, X_NOTRANS, w, X_TRANS, dedx);
XTensor &gradPassed = dedx; XTensor &gradPassed = dedx;
XTensor dedsHidden; XTensor dedsHidden;
...@@ -939,17 +936,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -939,17 +936,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
_HardTanHBackward(&h, &s, &dedh, &deds); _HardTanHBackward(&h, &s, &dedh, &deds);
/* gradient of the weight: dE/dw = x^T * dE/ds */ /* gradient of the weight: dE/dw = x^T * dE/ds */
_MatrixMul(&x, X_TRANS, &deds, X_NOTRANS, &dedw); MatrixMul(x, X_TRANS, deds, X_NOTRANS, dedw);
/* gradient of the bias: dE/db = dE/ds * 1 = dE/ds /* gradient of the bias: dE/db = dE/ds * 1 = dE/ds
specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */ specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */
_ReduceSum(&deds, &dedb, 0); ReduceSum(deds, dedb, 0);
/* gradient of the input: dE/dx = dE/ds * w^T */ /* gradient of the input: dE/dx = dE/ds * w^T */
_MatrixMul(&deds, X_NOTRANS, &w, X_TRANS, &dedx); MatrixMul(deds, X_NOTRANS, w, X_TRANS, dedx);
if (i > 0) if (i > 0)
_CopyValues(&dedx, &gradPassed); CopyValues(dedx, gradPassed);
} }
TensorList eList(n - 1); TensorList eList(n - 1);
...@@ -964,7 +961,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -964,7 +961,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
XTensor &dedyCat = depth > 0 ? dedxBottom : dedx; XTensor &dedyCat = depth > 0 ? dedxBottom : dedx;
/* split the concatenation of gradients of the embeddings */ /* split the concatenation of gradients of the embeddings */
_Split(&dedyCat, &eList, 1, n - 1); Split(dedyCat, eList, 1, n - 1);
/* go over for each word */ /* go over for each word */
for (int i = 0; i < n - 1; i++) { for (int i = 0; i < n - 1; i++) {
...@@ -975,7 +972,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -975,7 +972,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* gradient of the embedding weight: dE/dw += x^T * dE/dy /* gradient of the embedding weight: dE/dw += x^T * dE/dy
NOTE that we accumulate dE/dw here because the matrix w NOTE that we accumulate dE/dw here because the matrix w
is shared by several layers (or words) */ is shared by several layers (or words) */
_MatrixMul(&x, X_TRANS, dedy, X_NOTRANS, &dedw, 1.0F, 1.0F); MatrixMul(x, X_TRANS, *dedy, X_NOTRANS, dedw, 1.0F, 1.0F);
delete dedy; delete dedy;
} }
......
...@@ -737,6 +737,11 @@ void XLink::ShowNode(FILE * file, XTensor * node) ...@@ -737,6 +737,11 @@ void XLink::ShowNode(FILE * file, XTensor * node)
} }
} }
fprintf(file, "shape[%d] ", node->order);
for (int i = 0; i < node->order; i++)
fprintf(file, "%d ", node->GetDim(i));
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
......
...@@ -59,6 +59,8 @@ const char * GetOPName(int type) ...@@ -59,6 +59,8 @@ const char * GetOPName(int type)
return "M_DIV"; return "M_DIV";
else if (type == MATH_DIVDIM) else if (type == MATH_DIVDIM)
return "M_DIVDIM"; return "M_DIVDIM";
else if (type == MATH_MASK)
return "M_MASK";
else if (type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if (type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
......
...@@ -48,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -48,7 +48,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CLIP MATH_ROUND + 1 #define MATH_CLIP MATH_ROUND + 1
#define MATH_DIV MATH_CLIP + 1 #define MATH_DIV MATH_CLIP + 1
#define MATH_DIVDIM MATH_DIV + 1 #define MATH_DIVDIM MATH_DIV + 1
#define MATH_MATRIXMUL MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
......
...@@ -1826,6 +1826,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1826,6 +1826,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
fprintf(file, "%s ", label); fprintf(file, "%s ", label);
if(isInit){ if(isInit){
fprintf(file, "id=%d ", id);
fprintf(file, "order=%d dimsize=", order); fprintf(file, "order=%d dimsize=", order);
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]); fprintf(file, "%d", dimSize[i]);
...@@ -1878,7 +1879,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1878,7 +1879,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
fprintf(file, "[%d]%e ", key, value); fprintf(file, "[%d]%e ", key, value);
} }
} }
fprintf(file, "\n"); fprintf(file, "\n\n");
if (isNewData) { if (isNewData) {
delete[](char*)d; delete[](char*)d;
......
...@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div(a, b, a, alpha, leadingDim); _Div(a, b, a, alpha, leadingDim);
} }
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the division is performed as DivDim (in more details in DivDim.h) return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor >> a - a tensor
...@@ -229,9 +246,8 @@ where i is the index of the item ...@@ -229,9 +246,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -245,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -245,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */ /* call _Div function */
_Div(&a, &b, &c, 0, leadingDim); _Div(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -256,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -256,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -122,7 +122,7 @@ where i is the item index ...@@ -122,7 +122,7 @@ where i is the item index
*/ */
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise division of two tensors (return an XTensor structure) element-wise division of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise division of two tensors: ...@@ -54,7 +55,7 @@ element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i) c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -183,9 +183,8 @@ i.e., a is divided with b by broadcasting ...@@ -183,9 +183,8 @@ i.e., a is divided with b by broadcasting
>> c - where we put result. we save it in a if c is NULL >> c - where we put result. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> alpha - the scaling factor >> alpha - the scaling factor
>> requireLink - if add operation to network
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -194,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, ...@@ -194,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha,
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad == true) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c ...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a, where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false); void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha) ...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
} }
/* /*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha) ...@@ -140,16 +151,35 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
/* call _Sum function */ /* call _Mask function */
_Mask(&a, &mask, &c, alpha); _Mask(&a, &mask, &c, alpha);
/* tensor connections */ /* tensor connections */
//XLink::MakeLink(&a, &mask, &c, MATH_SUM); XLink::MakeLink(&a, &mask, &c, MATH_MASK);
//XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
// TODO!!
ShowNTErrors("TODO!");
return c; return c;
} }
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
}
/* call _Mask function */
_Mask(&a, &mask, &c, alpha);
if (c.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
}
}
} }
\ No newline at end of file
...@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0 ...@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha = 0.0); void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha = 0.0);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha = 0.0);
/* /*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
...@@ -52,6 +53,14 @@ where i is the index of the element ...@@ -52,6 +53,14 @@ where i is the index of the element
*/ */
XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0); XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha = 0.0);
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha = 0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MASK_H__ #endif // __MASK_H__
...@@ -202,7 +202,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -202,7 +202,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList; delete cList;
} }
bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c) bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c)
{ {
if (!(a && b && c)) if (!(a && b && c))
return false; return false;
...@@ -231,10 +233,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen ...@@ -231,10 +233,13 @@ bool CheckMMulShape(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTen
dimSize[sub++] = bm; dimSize[sub++] = bm;
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
if (dimSize[i] != c->dimSize[i]) if (dimSize[i] != c->dimSize[i]) {
delete[] dimSize;
return false; return false;
}
} }
delete[] dimSize;
return true; return true;
} }
...@@ -303,8 +308,8 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -303,8 +308,8 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
} }
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -337,9 +342,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -337,9 +342,9 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
} }
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
...@@ -400,7 +405,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -400,7 +405,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
} }
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -435,7 +440,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, ...@@ -435,7 +440,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -40,8 +40,11 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* /*
matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha matrix multiplication (return an XTensor structure) c = trans(a) * trans(b) * alpha
...@@ -56,19 +59,23 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th ...@@ -56,19 +59,23 @@ bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of th
C should be a tensor of z * x * n * m. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0,
XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0,
XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/ /* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b, XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Multiply(a, b, a, alpha, leadingDim); _Multiply(a, b, a, alpha, leadingDim);
} }
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void MultiplyMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Multiply(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h) return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor >> a - a tensor
...@@ -230,9 +247,8 @@ where i is the index of the item ...@@ -230,9 +247,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -246,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -246,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */ /* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -257,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -257,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */ /* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -122,8 +122,8 @@ where i is the item index ...@@ -122,8 +122,8 @@ where i is the item index
*/ */
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void MultiplyMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise product of two tensors (return an XTensor structure) element-wise product of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise product of two tensors: ...@@ -54,7 +55,7 @@ element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha) ...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
} }
/* /*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void MultiplyDimMe(XTensor& a, const XTensor& b, int n, DTYPE alpha)
{
_MultiplyDim(&a, &b, &a, n, alpha);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections) tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -180,9 +198,8 @@ i.e., a is multiplied with b by broadcasting ...@@ -180,9 +198,8 @@ i.e., a is multiplied with b by broadcasting
>> b - another tensor whose size is equal to that of dimension n of a >> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a * b + \alpha * c. we save it in a if c is NULL >> c - where we put a * b + \alpha * c. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> requireLink - if add operation to network
*/ */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink) void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -191,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req ...@@ -191,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -347,9 +364,8 @@ where some of dimensions of b can be of size 1 ...@@ -347,9 +364,8 @@ where some of dimensions of b can be of size 1
>> a - a tensor >> a - a tensor
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> requireLink - if add operation to network
*/ */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink) void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -358,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ ...@@ -358,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0); _MultiplyBroadcast(&a, &b, &c, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
......
...@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
{ {
_Sub(a, b, a, beta); _Sub(a, b, a, beta);
} }
/*
tensor subtraction a = a - b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SubMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sub(&a, &b, &a, beta);
}
/* /*
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h) return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
...@@ -203,9 +216,8 @@ tensor subtraction c = a - b * \beta ...@@ -203,9 +216,8 @@ tensor subtraction c = a - b * \beta
>> b - another tensor >> b - another tensor
>> c - where we put a-b*\beta. we save it in a if c is NULL >> c - where we put a-b*\beta. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -217,7 +229,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -217,7 +229,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _Sub function */ /* call _Sub function */
_Sub(&a, &b, &c, beta); _Sub(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB); XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -227,7 +239,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -227,7 +239,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _SubDim function */ /* call _SubDim function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta ...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor subtraction c = a - b * \beta tensor subtraction c = a - b * \beta
...@@ -43,7 +44,7 @@ make a new tensor c to keep the result and return it ...@@ -43,7 +44,7 @@ make a new tensor c to keep the result and return it
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */ /* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -40,7 +40,7 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1. ...@@ -40,7 +40,7 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.
/* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a, /* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting*/ i.e., a is subtracted with b by broadcasting*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
_Sum(a, b, a, beta); _Sum(a, b, a, beta);
} }
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/* /*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h) return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor >> a - a tensor
...@@ -207,9 +220,8 @@ tensor summation c = a + b * \beta ...@@ -207,9 +220,8 @@ tensor summation c = a + b * \beta
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -221,7 +233,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -221,7 +233,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _Sum function */ /* call _Sum function */
_Sum(&a, &b, &c, beta); _Sum(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM); XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -231,7 +243,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -231,7 +243,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta ...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
...@@ -42,7 +43,7 @@ make a new tensor c to keep the result and return it ...@@ -42,7 +43,7 @@ make a new tensor c to keep the result and return it
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */ /* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -200,9 +200,8 @@ i.e., a is summed with b by broadcasting ...@@ -200,9 +200,8 @@ i.e., a is summed with b by broadcasting
>> c - where we put a+b*\beta. we save it in a if c is NULL >> c - where we put a+b*\beta. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink) void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -211,7 +210,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b ...@@ -211,7 +210,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -368,9 +367,8 @@ c = a + b * \beta ...@@ -368,9 +367,8 @@ c = a + b * \beta
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -379,7 +377,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bo ...@@ -379,7 +377,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bo
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta); _SumBroadcast(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
......
...@@ -44,7 +44,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1. ...@@ -44,7 +44,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a, /* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */ i.e., a is summed with b by broadcasting */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -54,7 +54,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta ...@@ -54,7 +54,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -126,4 +126,18 @@ XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType) ...@@ -126,4 +126,18 @@ XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType)
return output; return output;
} }
void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE dataType)
{
if (!output.isInit || input.dataType != output.dataType) {
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
}
_ConvertDataType(&input, &output);
/* tensor connection */
if (output.enableGrad)
XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE);
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -38,6 +38,9 @@ void _ConvertDataType(const XTensor * input, XTensor * output); ...@@ -38,6 +38,9 @@ void _ConvertDataType(const XTensor * input, XTensor * output);
/* convert data type (return an XTensor structure) */ /* convert data type (return an XTensor structure) */
XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType); XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType);
/* convert data type */
void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE dataType);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __CONVERTDATATYPE_H__ #endif // __CONVERTDATATYPE_H__
...@@ -27,41 +27,41 @@ ...@@ -27,41 +27,41 @@
namespace nts { namespace nts {
template<class T1, class T2> template<class T1, class T2>
T1 descale(T1 x, T2 num) T1 BinaryDescale(T1 x, T2 num)
{ {
return (T1)(x / num); return (T1)(x / num);
} }
template<class T1, class T2> template<class T1, class T2>
T1 power(T1 x, T2 num) T1 BinaryPower(T1 x, T2 num)
{ {
if (num == 0) if (num == 0)
return (T1)1.0; return (T1)1.0;
else if (num == 0.5) else if (num == 0.5)
return (T1)sqrt(num); return (T1)sqrt(x);
else if (num == 2) else if (num == 2)
return x * x; return x * x;
else { else {
if (x == 0 && num < 0) if (x == 0 && num < 0)
return (T1)NAN; return (T1)1e20F;
else else
return (T1)pow(x, num); return (T1)pow(x, num);
} }
} }
template<class T1, class T2> template<class T1, class T2>
T1 scale(T1 x, T2 num) T1 BinaryScale(T1 x, T2 num)
{ {
return (T1)(x * num); return (T1)(x * num);
} }
template<class T1, class T2> template<class T1, class T2>
T1 shift(T1 x, T2 num) T1 BinaryShift(T1 x, T2 num)
{ {
return (T1)(x + num); return (T1)(x + num);
} }
int mod(int x, int num) int BinaryMod(int x, int num)
{ {
return x % num; return x % num;
} }
...@@ -135,6 +135,7 @@ XTensor funcName(const XTensor &a, T num) ...@@ -135,6 +135,7 @@ XTensor funcName(const XTensor &a, T num)
b.SetTMPFlag(); \ b.SetTMPFlag(); \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
return b; \ return b; \
} \ } \
template XTensor funcName<int>(const XTensor&, int); \ template XTensor funcName<int>(const XTensor&, int); \
...@@ -151,37 +152,38 @@ void funcName(const XTensor &a, XTensor &b, T num) ...@@ -151,37 +152,38 @@ void funcName(const XTensor &a, XTensor &b, T num)
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if (b.enableGrad) { \ if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
} \ } \
} \ } \
template void funcName<int>(const XTensor&, XTensor&, int); \ template void funcName<int>(const XTensor&, XTensor&, int); \
template void funcName<float>(const XTensor&, XTensor&, float); \ template void funcName<float>(const XTensor&, XTensor&, float); \
template void funcName<double>(const XTensor&, XTensor&, double); template void funcName<double>(const XTensor&, XTensor&, double);
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescale, descale) _SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescale, BinaryDescale)
_SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale) _SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_ME(DescaleMe, _Descale) SIMPLE_BINARY_FUNCTION_ME(DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE) SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE)
SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE) SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE)
_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, mod) _SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, BinaryMod)
_SIMPLE_BINARY_FUNCTION_ME(_ModMe, _Mod) _SIMPLE_BINARY_FUNCTION_ME(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_ME(ModMe, _Mod) SIMPLE_BINARY_FUNCTION_ME(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION(Mod, _Mod, MATH_MOD) SIMPLE_BINARY_FUNCTION(Mod, _Mod, MATH_MOD)
SIMPLE_BINARY_FUNCTION_VOID(Mod, _Mod, MATH_MOD) SIMPLE_BINARY_FUNCTION_VOID(Mod, _Mod, MATH_MOD)
_SIMPLE_BINARY_FUNCTION(_Power, _CudaPower, power) _SIMPLE_BINARY_FUNCTION(_Power, _CudaPower, BinaryPower)
_SIMPLE_BINARY_FUNCTION_ME(_PowerMe, _Power) _SIMPLE_BINARY_FUNCTION_ME(_PowerMe, _Power)
SIMPLE_BINARY_FUNCTION_ME(PowerMe, _Power) SIMPLE_BINARY_FUNCTION_ME(PowerMe, _Power)
SIMPLE_BINARY_FUNCTION(Power, _Power, MATH_POWER) SIMPLE_BINARY_FUNCTION(Power, _Power, MATH_POWER)
SIMPLE_BINARY_FUNCTION_VOID(Power, _Power, MATH_POWER) SIMPLE_BINARY_FUNCTION_VOID(Power, _Power, MATH_POWER)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, scale) _SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, BinaryScale)
_SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale) _SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_ME(ScaleMe, _Scale) SIMPLE_BINARY_FUNCTION_ME(ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE) SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE)
SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE) SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, shift) _SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, BinaryShift)
_SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift) _SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_ME(ShiftMe, _Shift) SIMPLE_BINARY_FUNCTION_ME(ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT) SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
......
...@@ -31,31 +31,31 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,31 +31,31 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
__device__ __device__
int BaseMod(int x, int base) int BinaryCudaMod(int x, int base)
{ {
return x % base; return x % base;
} }
template<class T1, class T2> template<class T1, class T2>
__device__ __device__
T1 BaseDescale(T1 x, T2 num) T1 BinaryCudaDescale(T1 x, T2 num)
{ {
return x / num; return x / num;
} }
template<class T1, class T2> template<class T1, class T2>
__device__ __device__
T1 BasePower(T1 x, T2 num) T1 BinaryCudaPower(T1 x, T2 num)
{ {
if (num == 0) if (num == 0)
return (T1)1.0; return (T1)1.0;
else if (num == 0.5) else if (num == 0.5)
return (T1)sqrt((float)num); return (T1)sqrt((float)x);
else if (num == 2) else if (num == 2)
return (T1)(x * x); return (T1)(x * x);
else { else {
if (x == 0 && num < 0) if (x == 0 && num < 0)
return 1e20F; return (T1)1e20F;
else else
return (T1)pow((float)x, (float)num); return (T1)pow((float)x, (float)num);
} }
...@@ -63,14 +63,14 @@ T1 BasePower(T1 x, T2 num) ...@@ -63,14 +63,14 @@ T1 BasePower(T1 x, T2 num)
template<class T1, class T2> template<class T1, class T2>
__device__ __device__
T1 BaseScale(T1 x, T2 num) T1 BinaryCudaScale(T1 x, T2 num)
{ {
return x * num; return x * num;
} }
template<class T1, class T2> template<class T1, class T2>
__device__ __device__
T1 BaseShift(T1 x, T2 num) T1 BinaryCudaShift(T1 x, T2 num)
{ {
return x + num; return x + num;
} }
...@@ -126,11 +126,11 @@ template void _Cuda##funcName<int>(const XTensor*, XTensor*, int); ...@@ -126,11 +126,11 @@ template void _Cuda##funcName<int>(const XTensor*, XTensor*, int);
template void _Cuda##funcName<float>(const XTensor*, XTensor*, float); \ template void _Cuda##funcName<float>(const XTensor*, XTensor*, float); \
template void _Cuda##funcName<double>(const XTensor*, XTensor*, double); template void _Cuda##funcName<double>(const XTensor*, XTensor*, double);
SIMPLE_BINARY_FUNCTION_GPU(Descale, BaseDescale) SIMPLE_BINARY_FUNCTION_GPU(Descale, BinaryCudaDescale)
SIMPLE_BINARY_FUNCTION_GPU(Mod, BaseMod) SIMPLE_BINARY_FUNCTION_GPU(Mod, BinaryCudaMod)
SIMPLE_BINARY_FUNCTION_GPU(Power, BasePower) SIMPLE_BINARY_FUNCTION_GPU(Power, BinaryCudaPower)
SIMPLE_BINARY_FUNCTION_GPU(Scale, BaseScale) SIMPLE_BINARY_FUNCTION_GPU(Scale, BinaryCudaScale)
SIMPLE_BINARY_FUNCTION_GPU(Shift, BaseShift) SIMPLE_BINARY_FUNCTION_GPU(Shift, BinaryCudaShift)
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -71,6 +71,18 @@ void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper) ...@@ -71,6 +71,18 @@ void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper)
} }
/* /*
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void ClipMe(XTensor& a, DTYPE lower, DTYPE upper)
{
_Clip(&a, &a, lower, upper);
}
/*
set every entry to its clip value (return an XTensor structure) set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor we are processing >> a - input tensor we are processing
...@@ -94,7 +106,7 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) ...@@ -94,7 +106,7 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
return b; return b;
} }
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink) void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -103,7 +115,7 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool require ...@@ -103,7 +115,7 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool require
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP); XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
......
...@@ -33,11 +33,15 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper); ...@@ -33,11 +33,15 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper); void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void ClipMe(XTensor & a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (return an XTensor structure) /* set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper); XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink = false); void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper);
/* /*
backward of Clip function backward of Clip function
......
...@@ -37,37 +37,56 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b) ...@@ -37,37 +37,56 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b)
} }
#ifdef USE_CUDA #ifdef USE_CUDA
/* define three marco separately, specify the respective function names (GPU mode) */ /* define three marco separately, specify the respective function names */
#define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \ void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \ CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \ /* run it on GPUs */ \
if (a->devID >= 0) { \ if (a->devID >= 0) { \
_cudaFuncName(a, b, number); \ if (useCUDA) { \
return; \ _cudaFuncName(a, b, number); \
} \ return; \
DTYPE * d = (DTYPE*)a->data; \ } \
DTYPE * db = (DTYPE*)b->data; \ else \
for (int i = 0; i < a->unitNum; i++) \ ShowNTErrors("No GPU devices support!") \
db[i] = (DTYPE)origFunc(d[i], number); \ } \
DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i], number); \
}
#define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, DTYPE number) \
{ \
_funcName(a, a, number); \
}
#define SIMPLE_COMPARE_FUNCTION_ME(funcNameMe, _funcName) \
void funcNameMe(XTensor & a, DTYPE number) \
{ \
_funcName(&a, &a, number); \
}
#define SIMPLE_COMPARE_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, DTYPE number) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, number); \
return b; \
} }
#define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \ #define SIMPLE_COMPARE_FUNCTION_VOID(funcName, _funcName, operationId) \
void _funcNameMe(XTensor * a, DTYPE number) \ void funcName(const XTensor &a, XTensor &b, DTYPE number) \
{ \ { \
_funcName(a, a, number); \ if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
} InitTensor(&b, &a); \
} \
#define SIMPLE_COMPARE_FUNCTION(funcName, _funcName, operationId) \ _funcName(&a, &b, number); \
XTensor funcName(const XTensor &a, DTYPE number) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, number); \
return b; \
} }
// I think we needn't to make link. // I think we needn't to make link.
...@@ -75,51 +94,15 @@ XTensor funcName(const XTensor &a, DTYPE number) \ ...@@ -75,51 +94,15 @@ XTensor funcName(const XTensor &a, DTYPE number) \
_SIMPLE_COMPARE_FUNCTION(_Equal, _CudaEqual, myIsEqual) _SIMPLE_COMPARE_FUNCTION(_Equal, _CudaEqual, myIsEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal) _SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION_ME(EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL) SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL)
SIMPLE_COMPARE_FUNCTION_VOID(Equal, _Equal, MATH_EQUAL)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, _CudaNotEqual, myIsNotEqual) _SIMPLE_COMPARE_FUNCTION(_NotEqual, _CudaNotEqual, myIsNotEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual) _SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL) SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i], number); \
}
#define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, DTYPE number) \
{ \
_funcName(a, a, number); \
}
#define SIMPLE_COMPARE_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor &a, DTYPE number) \
{ \
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, number); \
return b; \
}
// I think we needn't to make link.
// XLink::MakeLink(&a, NULL, &b, operationId);
_SIMPLE_COMPARE_FUNCTION(_Equal, myIsEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, myIsNotEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
#endif #endif
......
...@@ -32,18 +32,30 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value); ...@@ -32,18 +32,30 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */ /* check whether every entry is equal to the given value (do it on site) */
void _EqualMe(XTensor * a, DTYPE value); void _EqualMe(XTensor * a, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */
void EqualMe(XTensor & a, DTYPE value);
/* check whether every entry is equal to the given value (return an XTensor structure) */ /* check whether every entry is equal to the given value (return an XTensor structure) */
XTensor Equal(const XTensor & a, DTYPE value); XTensor Equal(const XTensor & a, DTYPE value);
/* check whether every entry is equal to the given value */
void Equal(const XTensor & a, XTensor & b, DTYPE value);
/* check whether every entry is not equal to the given value */ /* check whether every entry is not equal to the given value */
void _NotEqual(const XTensor * a, XTensor * b, DTYPE value); void _NotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */ /* check whether every entry is not equal to the given value (do it on site) */
void _NotEqualMe(XTensor * a, DTYPE value); void _NotEqualMe(XTensor * a, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */
void NotEqualMe(XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value (return an XTensor structure) */ /* check whether every entry is not equal to the given value (return an XTensor structure) */
XTensor NotEqual(const XTensor & a, DTYPE value); XTensor NotEqual(const XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value */
void NotEqual(const XTensor & a, XTensor & b, DTYPE value);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // end __COMPARE_H__ #endif // end __COMPARE_H__
\ No newline at end of file
...@@ -42,7 +42,9 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -42,7 +42,9 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
>> b - the bias >> b - the bias
>> epsilon - a parameter >> epsilon - a parameter
*/ */
void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon) void _Normalize(const XTensor * input, XTensor * output, int dim,
const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!"); CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!");
...@@ -109,10 +111,35 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -109,10 +111,35 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
>> b - the bias >> b - the bias
>> epsilon - a parameter >> epsilon - a parameter
*/ */
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon) void _NormalizeMe(XTensor * input, int dim,
const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
_Normalize(input, input, dim, mean, var, a, b, epsilon); _Normalize(input, input, dim, mean, var, a, b, epsilon);
} }
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tensor and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
*/
void NormalizeMe(XTensor& input, int dim,
const XTensor& mean, const XTensor& var,
const XTensor& a, const XTensor& b, DTYPE epsilon)
{
_Normalize(&input, &input, dim, &mean, &var, &a, &b, epsilon);
}
/* /*
normalized the data with normal distribution (return an XTensor structure) normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -129,7 +156,9 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -129,7 +156,9 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
>> epsilon - a parameter >> epsilon - a parameter
<< return - the result of normalized the data with normal distribution << return - the result of normalized the data with normal distribution
*/ */
XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTensor &var, const XTensor &a, const XTensor &b, DTYPE epsilon) XTensor Normalize(const XTensor &input, int dim,
const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon)
{ {
XTensor output(&input); XTensor output(&input);
output.SetTMPFlag(); output.SetTMPFlag();
...@@ -150,4 +179,48 @@ XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTen ...@@ -150,4 +179,48 @@ XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTen
return output; return output;
} }
/*
normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> output - the output tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
<< return - the result of normalized the data with normal distribution
*/
void Normalize(const XTensor &input, XTensor &output, int dim,
const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon)
{
if (!output.isInit || !XTensor::IsSameShaped(&input, &output)) {
InitTensor(&output, &input);
}
/* call _Normalize function */
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
if (output.enableGrad == true) {
/* tensor connections */
TensorList list(5);
list.Add((XTensor*)&input);
list.Add((XTensor*)&mean);
list.Add((XTensor*)&var);
list.Add((XTensor*)&a);
list.Add((XTensor*)&b);
XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon);
}
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -31,7 +31,9 @@ normalized the data with normal distribution. ...@@ -31,7 +31,9 @@ normalized the data with normal distribution.
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter. where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/ */
void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon); void _Normalize(const XTensor * input, XTensor * output, int dim,
const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon);
/* /*
normalized the data with normal distribution (do it on site) normalized the data with normal distribution (do it on site)
...@@ -39,7 +41,29 @@ keep the result in the input tenosr and return nothing ...@@ -39,7 +41,29 @@ keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter. where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/ */
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon); void _NormalizeMe(XTensor * input, int dim,
const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon);
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
void NormalizeMe(XTensor & input, int dim,
const XTensor & mean, const XTensor & var,
const XTensor & a, const XTensor & b, DTYPE epsilon);
/*
normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
XTensor Normalize(const XTensor &input, int dim,
const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon);
/* /*
normalized the data with normal distribution (return an XTensor structure) normalized the data with normal distribution (return an XTensor structure)
...@@ -47,7 +71,9 @@ make a new tensor to keep the result and return it ...@@ -47,7 +71,9 @@ make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter. where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/ */
XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTensor &var, const XTensor &a, const XTensor &b, DTYPE epsilon); void Normalize(const XTensor &input, XTensor &output, int dim,
const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
} }
/* /*
scale and shift all tensor entires (do it on site)
keep the result in the input tensor a and return nothing
a = a * scale + shift
>> a - the input/output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
void ScaleAndShiftMe(XTensor& a, DTYPE scale, DTYPE shift)
{
_ScaleAndShift(&a, &a, scale, shift);
}
/*
scale and shift all tensor entires (return an XTensor structure) scale and shift all tensor entires (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -127,9 +142,8 @@ b = a * scale + shift ...@@ -127,9 +142,8 @@ b = a * scale + shift
>> b - the output tensor >> b - the output tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
>> requireLink - if add operation to network
*/ */
void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, bool requireLink) void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -138,7 +152,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, boo ...@@ -138,7 +152,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, boo
/* call _ScaleAndShift function */ /* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift); _ScaleAndShift(&a, &b, scale, shift);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT); XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale); XLink::AddParamToHead(&b, scale);
......
...@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0); ...@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/* /*
scale and shift all tensor entires scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void ScaleAndShiftMe(XTensor & a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
b = a * scale + shift b = a * scale + shift
*/ */
...@@ -54,7 +61,7 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0); ...@@ -54,7 +61,7 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
scale and shift all tensor entires scale and shift all tensor entires
b = a * scale + shift b = a * scale + shift
*/ */
void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0, bool requireLink = false); void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,24 +28,24 @@ ...@@ -28,24 +28,24 @@
namespace nts{ namespace nts{
template<class T> template<class T>
T negate(T x) { T UnaryNegate(T x) {
return (T)-x; return (T)-x;
} }
template<class T> template<class T>
T square(T x) T UnarySquare(T x)
{ {
return (T)(x * x); return (T)(x * x);
} }
template<class T> template<class T>
T round(T r) T UnaryRound(T r)
{ {
return (r > 0.0) ? (T)floor(r + 0.5) : (T)ceil(r - 0.5); return (r > 0.0) ? (T)floor(r + 0.5) : (T)ceil(r - 0.5);
} }
template<class T> template<class T>
T sign(T r) T UnarySign(T r)
{ {
if (r > 0.0) if (r > 0.0)
return (T)1.0; return (T)1.0;
...@@ -56,13 +56,13 @@ T sign(T r) ...@@ -56,13 +56,13 @@ T sign(T r)
} }
template<class T> template<class T>
T isnonzero(T r) T UnaryIsNonZero(T r)
{ {
return (r != 0.0) ? (T)1.0 : (T)0.0; return (r != 0.0) ? (T)1.0 : (T)0.0;
} }
template<class T> template<class T>
T iszero(T r) T UnaryIsZero(T r)
{ {
return (r == 0.0) ? (T)1.0 : (T)0.0; return (r == 0.0) ? (T)1.0 : (T)0.0;
} }
...@@ -142,14 +142,14 @@ _SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs) ...@@ -142,14 +142,14 @@ _SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil) _SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp) _SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
_SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor) _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero) _SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, UnaryIsNonZero)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero) _SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, UnaryIsZero)
_SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log) _SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log)
_SIMPLE_UNARY_FUNCTION(_Negate, _CudaNegate, negate) _SIMPLE_UNARY_FUNCTION(_Negate, _CudaNegate, UnaryNegate)
_SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round) _SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round)
_SIMPLE_UNARY_FUNCTION(_Sign, _CudaSign, sign) _SIMPLE_UNARY_FUNCTION(_Sign, _CudaSign, UnarySign)
_SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt) _SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt)
_SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, square) _SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, UnarySquare)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin) _SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos) _SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos)
_SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan) _SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
......
...@@ -32,63 +32,63 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -32,63 +32,63 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
template<class T> template<class T>
__device__ __device__
T BaseCeil(T x) T UnaryCudaCeil(T x)
{ {
return (T)ceil((float)x); return (T)ceil((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseExp(T x) T UnaryCudaExp(T x)
{ {
return (T)exp((float)x); return (T)exp((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseFabs(T x) T UnaryCudaFabs(T x)
{ {
return (T)fabs((float)x); return (T)fabs((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseFloor(T x) T UnaryCudaFloor(T x)
{ {
return (T)floor((float)x); return (T)floor((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseIsNonZero(T r) T UnaryCudaIsNonZero(T r)
{ {
return (r != (T)0.0) ? (T)1.0 : (T)0.0; return (r != (T)0.0) ? (T)1.0 : (T)0.0;
} }
template<class T> template<class T>
__device__ __device__
T BaseIsZero(T r) T UnaryCudaIsZero(T r)
{ {
return (r == (T)0.0) ? (T)1.0 : (T)0.0; return (r == (T)0.0) ? (T)1.0 : (T)0.0;
} }
template<class T> template<class T>
__device__ __device__
T BaseLog(T x) T UnaryCudaLog(T x)
{ {
return (T)log((float)x); return (T)log((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseNegate(T x) T UnaryCudaNegate(T x)
{ {
return -x; return -x;
} }
template<class T> template<class T>
__device__ __device__
T BaseSign(T r) T UnaryCudaSign(T r)
{ {
if (r > (T)0) if (r > (T)0)
return 1.0; return 1.0;
...@@ -100,43 +100,43 @@ T BaseSign(T r) ...@@ -100,43 +100,43 @@ T BaseSign(T r)
template<class T> template<class T>
__device__ __device__
T BaseSqrt(T x) T UnaryCudaSqrt(T x)
{ {
return (T)sqrt((float)x); return (T)sqrt((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseSquare(T x) T UnaryCudaSquare(T x)
{ {
return x * x; return x * x;
} }
template<class T> template<class T>
__device__ __device__
T BaseRound(T r) T UnaryCudaRound(T r)
{ {
return (r > (T)0.0) ? (T)BaseFloor(r + (T)0.5) : (T)BaseCeil(r - (T)0.5); return (r > (T)0.0) ? (T)UnaryCudaFloor(r + (T)0.5) : (T)UnaryCudaCeil(r - (T)0.5);
} }
template<class T> template<class T>
__device__ __device__
T BaseSin(T x) T UnaryCudaSin(T x)
{ {
return (T)sin((float)x); return (T)sin((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseCos(T x) T UnaryCudaCos(T x)
{ {
return (T)cos((float)x); return (T)cos((float)x);
} }
template<class T> template<class T>
__device__ __device__
T BaseTan(T x) T UnaryCudaTan(T x)
{ {
return (T)tan((float)x); return (T)tan((float)x);
} }
...@@ -181,10 +181,6 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \ ...@@ -181,10 +181,6 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \
Kernel##funcName<<<blocks, threads>>> \ Kernel##funcName<<<blocks, threads>>> \
((int*)a->data, (int*)b->data, a->unitNum); \ ((int*)a->data, (int*)b->data, a->unitNum); \
} \ } \
else if (a->dataType == X_FLOAT16) { \
Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, a->unitNum); \
} \
else { \ else { \
ShowNTErrors("TODO!"); \ ShowNTErrors("TODO!"); \
} \ } \
...@@ -194,22 +190,22 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \ ...@@ -194,22 +190,22 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \
SIMPLE_UNARY_FUNCTION_GPU(Absolute, BaseFabs) SIMPLE_UNARY_FUNCTION_GPU(Absolute, UnaryCudaFabs)
SIMPLE_UNARY_FUNCTION_GPU(Ceil, BaseCeil) SIMPLE_UNARY_FUNCTION_GPU(Ceil, UnaryCudaCeil)
SIMPLE_UNARY_FUNCTION_GPU(Exp, BaseExp) SIMPLE_UNARY_FUNCTION_GPU(Exp, UnaryCudaExp)
SIMPLE_UNARY_FUNCTION_GPU(Floor, BaseFloor) SIMPLE_UNARY_FUNCTION_GPU(Floor, UnaryCudaFloor)
SIMPLE_UNARY_FUNCTION_GPU(IsNonZero, BaseIsNonZero) SIMPLE_UNARY_FUNCTION_GPU(IsNonZero, UnaryCudaIsNonZero)
SIMPLE_UNARY_FUNCTION_GPU(IsZero, BaseIsZero) SIMPLE_UNARY_FUNCTION_GPU(IsZero, UnaryCudaIsZero)
SIMPLE_UNARY_FUNCTION_GPU(Log, BaseLog) SIMPLE_UNARY_FUNCTION_GPU(Log, UnaryCudaLog)
SIMPLE_UNARY_FUNCTION_GPU(Negate, BaseNegate) SIMPLE_UNARY_FUNCTION_GPU(Negate, UnaryCudaNegate)
SIMPLE_UNARY_FUNCTION_GPU(Round, BaseRound) SIMPLE_UNARY_FUNCTION_GPU(Round, UnaryCudaRound)
SIMPLE_UNARY_FUNCTION_GPU(Sign, BaseSign) SIMPLE_UNARY_FUNCTION_GPU(Sign, UnaryCudaSign)
SIMPLE_UNARY_FUNCTION_GPU(Sqrt, BaseSqrt) SIMPLE_UNARY_FUNCTION_GPU(Sqrt, UnaryCudaSqrt)
SIMPLE_UNARY_FUNCTION_GPU(Square, BaseSquare) SIMPLE_UNARY_FUNCTION_GPU(Square, UnaryCudaSquare)
SIMPLE_UNARY_FUNCTION_GPU(Sin, BaseSin) SIMPLE_UNARY_FUNCTION_GPU(Sin, UnaryCudaSin)
SIMPLE_UNARY_FUNCTION_GPU(Cos, BaseCos) SIMPLE_UNARY_FUNCTION_GPU(Cos, UnaryCudaCos)
SIMPLE_UNARY_FUNCTION_GPU(Tan, BaseTan) SIMPLE_UNARY_FUNCTION_GPU(Tan, UnaryCudaTan)
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -52,15 +52,15 @@ void _CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream) ...@@ -52,15 +52,15 @@ void _CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream)
} }
/* dense -> sparse */ /* dense -> sparse */
else if (!s->isSparse && t->isSparse && else if (!s->isSparse && t->isSparse &&
s->dataType == DEFAULT_DTYPE && s->dataType == DEFAULT_DTYPE &&
t->dataType == DEFAULT_DTYPE) t->dataType == DEFAULT_DTYPE)
{ {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
/* sparse -> dense */ /* sparse -> dense */
else if (s->isSparse && !t->isSparse && else if (s->isSparse && !t->isSparse &&
s->dataType == DEFAULT_DTYPE && s->dataType == DEFAULT_DTYPE &&
t->dataType == DEFAULT_DTYPE) t->dataType == DEFAULT_DTYPE)
{ {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
......
...@@ -219,7 +219,6 @@ void _SpreadForCopyIndexed(XTensor * s, XTensor * c, int dim, ...@@ -219,7 +219,6 @@ void _SpreadForCopyIndexed(XTensor * s, XTensor * c, int dim,
} }
} }
} }
/* /*
...@@ -236,15 +235,18 @@ void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index) ...@@ -236,15 +235,18 @@ void _SpreadForGather(XTensor * source, XTensor * collection, XTensor * index)
int order = source->order; int order = source->order;
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!"); CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(collection->GetDim(-1) == source->GetDim(-1), "Illegal dimension!");
CheckNTErrors(collection->unitNum/collection->GetDim(-1) == index->unitNum,
"Illegal dimension!");
for(int i = 0; i < order; i++){ //for(int i = 0; i < order; i++){
if(i == dim){ // if(i == dim){
CheckNTErrors(collection->GetDim(i) == index->unitNum, "Illegal dimension!"); // CheckNTErrors(collection->GetDim(i) == index->unitNum, "Illegal dimension!");
} // }
else { // else {
CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!"); // CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!");
} // }
} //}
#ifdef USE_CUDA #ifdef USE_CUDA
if(source->devID >= 0 && collection->devID >= 0) { if(source->devID >= 0 && collection->devID >= 0) {
......
...@@ -137,9 +137,8 @@ get the max value of the items along a dimension of the tensor ...@@ -137,9 +137,8 @@ get the max value of the items along a dimension of the tensor
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/ */
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink) void ReduceMax(const XTensor &input, XTensor &output, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -163,7 +162,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink) ...@@ -163,7 +162,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink)
/* call _ReduceMax function */ /* call _ReduceMax function */
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!"); CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
...@@ -36,7 +36,7 @@ make a new tensor to keep the result and return it ...@@ -36,7 +36,7 @@ make a new tensor to keep the result and return it
XTensor ReduceMax(const XTensor &input, int dim); XTensor ReduceMax(const XTensor &input, int dim);
/* get the max value of the items along a dimension of the tensor. */ /* get the max value of the items along a dimension of the tensor. */
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink = false); void ReduceMax(const XTensor &input, XTensor &output, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/ */
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink) void ReduceMean(const XTensor &input, XTensor &output, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -120,7 +119,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink ...@@ -120,7 +119,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink
/* call _ReduceMean function */ /* call _ReduceMean function */
_ReduceMean(&input, &output, dim); _ReduceMean(&input, &output, dim);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -43,7 +43,7 @@ XTensor ReduceMean(const XTensor &input, int dim); ...@@ -43,7 +43,7 @@ XTensor ReduceMean(const XTensor &input, int dim);
get the mean value along a dimension of the tensor get the mean value along a dimension of the tensor
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/ */
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink = false); void ReduceMean(const XTensor &input, XTensor &output, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -244,7 +244,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -244,7 +244,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
return output; return output;
} }
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power, bool isExp, bool requireLink) void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -268,7 +268,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh ...@@ -268,7 +268,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp); _ReduceSum(&input, &output, dim, &shift, power, isExp);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
...@@ -336,9 +336,8 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -336,9 +336,8 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> shift - shift the input >> shift - shift the input
>> ieExp - specify if the exp() is performed >> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array >> power - we perform pow(item_i, power) on each item in the array
>> requireLink - if add operation to network
*/ */
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool isExp, bool requireLink) void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -362,7 +361,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool ...@@ -362,7 +361,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp); _ReduceSum(&input, &output, dim, NULL, power, isExp);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output, ...@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
if (tid < blockDim.x / 32) if (tid < blockDim.x / 32)
value = data[tid]; value = data[tid];
else else
value = 0; value = 0;
value = shflDownReduceSum(value); value = shflDownReduceSum(value);
if (tid == 0 && blockIdx.x < reducedStrideNum) { if (tid == 0 && blockIdx.x < reducedStrideNum) {
...@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!"); CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
...@@ -44,7 +44,7 @@ sum = \sum_i exp(a_i - shift) if isExp == true ...@@ -44,7 +44,7 @@ sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false); XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false, bool requireLink = false); void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
/* /*
sum the items along a dimension of the tensor (return an XTensor structure) sum the items along a dimension of the tensor (return an XTensor structure)
...@@ -61,7 +61,7 @@ For a 1-dimensional data array a, ...@@ -61,7 +61,7 @@ For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false, bool requireLink = false); void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -91,9 +91,8 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2 ...@@ -91,9 +91,8 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> shift - bias on the input >> shift - bias on the input
>> requireLink - if add operation to network
*/ */
void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift, bool requireLink) void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -117,7 +116,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen ...@@ -117,7 +116,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
/* call _ReduceSumSquared function */ /* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift); _ReduceSumSquared(&input, &output, dim, &shift);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -45,7 +45,7 @@ squared sum of the items along a dimension of the tensor ...@@ -45,7 +45,7 @@ squared sum of the items along a dimension of the tensor
For a 1-dimensional data array a, For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2 sum = \sum_i (a_i - shift)^2
*/ */
void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift, bool requireLink = false); void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> mean - the mean value >> mean - the mean value
>> requireLink - if add operation to network
*/ */
void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean, bool requireLink) void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -120,7 +119,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso ...@@ -120,7 +119,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
/* call _ReduceVariance function */ /* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
if (requireLink) { if (output.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE); XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -43,7 +43,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean); ...@@ -43,7 +43,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean);
variance of the items along a dimension of the tensor variance of the items along a dimension of the tensor
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean, bool requireLink = false); void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -137,6 +137,115 @@ XTensor Concatenate(const TensorList &smalls, int dim) ...@@ -137,6 +137,115 @@ XTensor Concatenate(const TensorList &smalls, int dim)
} }
} }
bool CheckConcatenateShape(const TensorList &smalls, int dim, XTensor &big, bool uniform)
{
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
int * dimSize = new int[order];
if (uniform) {
for (int i = 0; i < tensor->order; i++) {
if (i != dim)
dimSize[i] = tensor->dimSize[i];
else
dimSize[i] = tensor->dimSize[dim] * smalls.count;
}
}
else {
for (int i = 0; i < tensor->order; i++)
if (i != dim)
dimSize[i] = tensor->dimSize[i];
int catDimSize = 0;
for (int i = 0; i < smalls.count; i++) {
XTensor * tensor = (XTensor*)smalls.GetItem(i);
catDimSize += tensor->dimSize[dim];
}
dimSize[dim] = catDimSize;
}
for (int i = 0; i < order; i++) {
if (dimSize[i] != big.dimSize[i]) {
delete[] dimSize;
return false;
}
}
delete[] dimSize;
return false;
}
void Concatenate(const TensorList & smalls, XTensor & big, int dim)
{
CheckNTErrors(smalls.count > 0, "Empty list!");
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
bool uniform = true;
for (int i = 1; i < smalls.count; i++) {
XTensor * a = (XTensor*)smalls.GetItem(i - 1);
XTensor * b = (XTensor*)smalls.GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b))
uniform = false;
}
if (!big.isInit || !CheckConcatenateShape(smalls, dim, big, uniform)) {
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
int * dimSize = new int[order];
if (uniform) {
for (int i = 0; i < tensor->order; i++) {
if (i != dim)
dimSize[i] = tensor->dimSize[i];
else
dimSize[i] = tensor->dimSize[dim] * smalls.count;
}
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
}
else {
for (int i = 0; i < tensor->order; i++)
if (i != dim)
dimSize[i] = tensor->dimSize[i];
int catDimSize = 0;
for (int i = 0; i < smalls.count; i++) {
XTensor * tensor = (XTensor*)smalls.GetItem(i);
catDimSize += tensor->dimSize[dim];
}
dimSize[dim] = catDimSize;
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
}
/* destroy variables */
delete[] dimSize;
}
if (uniform) {
/* call _Merge function */
_Merge(&smalls, &big, dim);
/* tensor connection */
if (big.enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim);
}
}
else {
/* call _ConcatenateSolely function */
_ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */
if (big.enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
}
}
}
/* /*
concatenate two tensors along a given dimension concatenate two tensors along a given dimension
......
...@@ -41,6 +41,8 @@ Note that this is actually a wrapper that selects ...@@ -41,6 +41,8 @@ Note that this is actually a wrapper that selects
*/ */
XTensor Concatenate(const TensorList &smalls, int dim); XTensor Concatenate(const TensorList &smalls, int dim);
void Concatenate(const TensorList & smalls, XTensor & big, int dim);
/* concatenate two tensors along a given dimension */ /* concatenate two tensors along a given dimension */
void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim); void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim);
......
...@@ -232,7 +232,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) ...@@ -232,7 +232,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
return t; return t;
} }
void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool requireLink) void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
{ {
if (!t.isInit || !CheckMergeSize(&s, &t, whereToMerge, leadingDim)) { if (!t.isInit || !CheckMergeSize(&s, &t, whereToMerge, leadingDim)) {
if (leadingDim < 0) if (leadingDim < 0)
...@@ -261,7 +261,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool ...@@ -261,7 +261,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool
/* call _Merge function */ /* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim); _Merge(&s, &t, whereToMerge, leadingDim);
if (requireLink) { if (t.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE); XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge); XLink::AddParamToHeadInt(&t, whereToMerge);
......
...@@ -33,7 +33,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = - ...@@ -33,7 +33,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -
e.g., (M, N/3, 3) -> (M, N) */ e.g., (M, N/3, 3) -> (M, N) */
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1); XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1);
void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim = -1, bool requireLink = false); void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */ /* merge small tensors into a big tensor */
void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge); void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge);
......
...@@ -41,6 +41,13 @@ a = permuted(a) ...@@ -41,6 +41,13 @@ a = permuted(a)
*/ */
void _PermuteMe(XTensor * a, int * dimPermute); void _PermuteMe(XTensor * a, int * dimPermute);
/*
permute the tensor dimensions (do it on site).
keep the result in the input tensor and return nothing.
a = permuted(a)
*/
void PermuteMe(XTensor &a, int * dimPermute);
/* /*
make a tensor with permuted dimensions (return an XTensor structure). make a tensor with permuted dimensions (return an XTensor structure).
make a new tensor to keep the result and return it. make a new tensor to keep the result and return it.
......
...@@ -48,7 +48,7 @@ XTensor Reshape(XTensor &s, int order, int * dimSize) ...@@ -48,7 +48,7 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
return t; return t;
} }
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink) void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
{ {
if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) { if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) {
InitTensor(&t, &s); InitTensor(&t, &s);
...@@ -57,7 +57,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink) ...@@ -57,7 +57,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink)
/* call Reshape function */ /* call Reshape function */
t.Reshape(order, dimSize); t.Reshape(order, dimSize);
if (requireLink) { if (t.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE); XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
} }
......
...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* reshape the tensor */ /* reshape the tensor */
XTensor Reshape(XTensor &s, int order, int * dimSize); XTensor Reshape(XTensor &s, int order, int * dimSize);
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink = false); void Reshape(XTensor &s, XTensor &t, int order, int * dimSize);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __RESHAPE_H__ #endif // __RESHAPE_H__
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) transform a tensor by splitting it, e.g., (N, M) -> (3, N/3, M)
>> s - the source tensor >> s - the source tensor
>> t - the target tensor (for return) >> t - the target tensor (for return)
...@@ -61,7 +61,7 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum) ...@@ -61,7 +61,7 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
} }
/* for the case that we split the last dimension. Actually /* for the case that we split the last dimension. Actually
(N, M) and (N, M/3, 3) have the same memory layout */ (N, M) and (3, N/3, M) have the same memory layout */
if (s->order - 1 == whereToSplitRDI) { if (s->order - 1 == whereToSplitRDI) {
XMemCopy(t->data, t->devID, s->data, s->devID, s->unitNum * s->unitSize); XMemCopy(t->data, t->devID, s->data, s->devID, s->unitNum * s->unitSize);
return; return;
...@@ -184,7 +184,7 @@ bool CheckSplitSize(const XTensor * s, const XTensor * t, int whereToSplit, int ...@@ -184,7 +184,7 @@ bool CheckSplitSize(const XTensor * s, const XTensor * t, int whereToSplit, int
} }
/* /*
transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) (return an XTensor structure) transform a tensor by splitting it, e.g., (N, M) -> (3, N/3, M) (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> s - the source tensor >> s - the source tensor
...@@ -227,7 +227,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -227,7 +227,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
return t; return t;
} }
void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool requireLink) void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
{ {
if (!t.isInit || !CheckSplitSize(&s, &t, whereToSplit, splitNum)) { if (!t.isInit || !CheckSplitSize(&s, &t, whereToSplit, splitNum)) {
int order = s.order + 1; int order = s.order + 1;
...@@ -251,7 +251,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool re ...@@ -251,7 +251,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool re
/* call _Split function */ /* call _Split function */
_Split(&s, &t, whereToSplit, splitNum); _Split(&s, &t, whereToSplit, splitNum);
if (requireLink) { if (t.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT); XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit); XLink::AddParamToHeadInt(&t, whereToSplit);
......
...@@ -41,7 +41,7 @@ e.g., (M, N) -> (M, N/3, 3) ...@@ -41,7 +41,7 @@ e.g., (M, N) -> (M, N/3, 3)
*/ */
XTensor Split(const XTensor &s, int whereToSplit, int splitNum); XTensor Split(const XTensor &s, int whereToSplit, int splitNum);
void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool requireLink = false); void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */ /* split a big tensor into small tensors */
void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum); void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum);
......
...@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim) ...@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim)
} }
/* /*
squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void SqueezeMe(XTensor& source, int leadingDim)
{
_Squeeze(&source, &source, leadingDim);
}
/*
squeeze the tensor along the specified dimension (return an XTensor structure) squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -112,7 +126,7 @@ XTensor Squeeze(XTensor & source, int leadingDim) ...@@ -112,7 +126,7 @@ XTensor Squeeze(XTensor & source, int leadingDim)
return target; return target;
} }
void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLink) void Squeeze(XTensor & source, XTensor & target, int leadingDim)
{ {
if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) { if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) {
InitTensor(&target, &source); InitTensor(&target, &source);
...@@ -121,7 +135,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLin ...@@ -121,7 +135,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLin
/* call _Squeeze function */ /* call _Squeeze function */
_Squeeze(&source, &target, leadingDim); _Squeeze(&source, &target, leadingDim);
if (requireLink) { if (target.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE); XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
} }
......
...@@ -33,11 +33,15 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1); ...@@ -33,11 +33,15 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SqueezeMe(XTensor * source, int leadingDim = -1); void _SqueezeMe(XTensor * source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing */
void SqueezeMe(XTensor & source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (return an XTensor structure) /* squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Squeeze(XTensor & source, int leadingDim = -1); XTensor Squeeze(XTensor & source, int leadingDim = -1);
void Squeeze(XTensor & source, XTensor & target, int leadingDim = -1, bool requireLink = false); void Squeeze(XTensor & source, XTensor & target, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -166,7 +166,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize) ...@@ -166,7 +166,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
return b; return b;
} }
void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLink) void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
{ {
if (!b.isInit || !CheckUnsqueezeSize(&a, &b, dim, dSize)) { if (!b.isInit || !CheckUnsqueezeSize(&a, &b, dim, dSize)) {
int order = a.order + 1; int order = a.order + 1;
...@@ -191,7 +191,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLin ...@@ -191,7 +191,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLin
/* call _Unsqueeze function */ /* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize); _Unsqueeze(&a, &b, dim, dSize);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE); XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim); XLink::AddParamToHeadInt(&b, dim);
......
...@@ -35,7 +35,7 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize); ...@@ -35,7 +35,7 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize);
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Unsqueeze(const XTensor &a, int dim, int dSize); XTensor Unsqueeze(const XTensor &a, int dim, int dSize);
void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLink = false); void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim) ...@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim)
} }
/* /*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void SortMe(XTensor& a, XTensor& index, int dim)
{
_Sort(&a, &a, &index, dim);
}
/*
sort the tensor along a given dimension (return an XTensor structure) sort the tensor along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in ...@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!"); CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI]) if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI]; k = b->dimSizeRDI[dimRDI];
......
...@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim) ...@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{ {
CheckNTErrors((a->dataType == X_INT), "TODO!"); CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI]; int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++) for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i]; stride *= a->dimSizeRDI[i];
......
...@@ -149,6 +149,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -149,6 +149,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!"); CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask; XTensor mask;
// int * maskArrayInt = NULL;
DTYPE * maskArray = NULL; DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb); DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
......
...@@ -83,7 +83,7 @@ XTensor HardTanH(const XTensor &x) ...@@ -83,7 +83,7 @@ XTensor HardTanH(const XTensor &x)
return y; return y;
} }
void HardTanH(const XTensor &x, XTensor &y, bool requireLink) void HardTanH(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -92,7 +92,7 @@ void HardTanH(const XTensor &x, XTensor &y, bool requireLink) ...@@ -92,7 +92,7 @@ void HardTanH(const XTensor &x, XTensor &y, bool requireLink)
/* call _HardTanH function */ /* call _HardTanH function */
_HardTanH(&x, &y); _HardTanH(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH); XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
} }
......
...@@ -39,7 +39,7 @@ void _HardTanH(const XTensor * x, XTensor * y); ...@@ -39,7 +39,7 @@ void _HardTanH(const XTensor * x, XTensor * y);
/* hard tanh function (return an XTensor structure) */ /* hard tanh function (return an XTensor structure) */
XTensor HardTanH(const XTensor &x); XTensor HardTanH(const XTensor &x);
void HardTanH(const XTensor &x, XTensor &y, bool requireLink = false); void HardTanH(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _HardTanHBackward(XTensor * y, XTensor * x, void _HardTanHBackward(XTensor * y, XTensor * x,
......
...@@ -59,7 +59,7 @@ XTensor Identity(const XTensor &x) ...@@ -59,7 +59,7 @@ XTensor Identity(const XTensor &x)
return y; return y;
} }
void Identity(const XTensor &x, XTensor &y, bool requireLink) void Identity(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !y.IsSameShaped(&y, &x)) { if (!y.isInit || !y.IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -68,7 +68,7 @@ void Identity(const XTensor &x, XTensor &y, bool requireLink) ...@@ -68,7 +68,7 @@ void Identity(const XTensor &x, XTensor &y, bool requireLink)
/* call _Identity function */ /* call _Identity function */
_Identity(&x, &y); _Identity(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY); XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
} }
......
...@@ -32,7 +32,7 @@ void _Identity(const XTensor * x, XTensor * y); ...@@ -32,7 +32,7 @@ void _Identity(const XTensor * x, XTensor * y);
/* identity function y = x (return an XTensor structure) */ /* identity function y = x (return an XTensor structure) */
XTensor Identity(const XTensor &x); XTensor Identity(const XTensor &x);
void Identity(const XTensor &x, XTensor &y, bool requireLink = false); void Identity(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _IdentityBackward(const XTensor * y, const XTensor * x, void _IdentityBackward(const XTensor * y, const XTensor * x,
......
...@@ -194,7 +194,15 @@ XTensor LogSoftmax(const XTensor &x, int leadDim) ...@@ -194,7 +194,15 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
return y; return y;
} }
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink) /*
log scale softmax y = log(e^x / \sum_{i} e^{x_i})
make a new tensor to keep the result and return it
>> x - input vector
>> y - output vector
>> leadDim - leading dimension (along which we perform reduction)
*/
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
{ {
int ld = leadDim; int ld = leadDim;
if (ld < 0) if (ld < 0)
...@@ -207,32 +215,12 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink) ...@@ -207,32 +215,12 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink)
/* call _LogSoftmax function */ /* call _LogSoftmax function */
_LogSoftmax(&x, &y, ld); _LogSoftmax(&x, &y, ld);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
} }
} }
/*
log scale softmax y = log(e^x / \sum_{i} e^{x_i})
make a new tensor to keep the result and return it
>> x - input vector
>> y - output vector
>> leadDim - leading dimension (along which we perform reduction)
*/
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
{
if(!XTensor::IsSameShaped(&x, &y))
InitTensor(&y, &x);
/* call _LogSoftmax function */
_LogSoftmax(&x, &y, leadDim);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, leadDim);
}
/* /*
backward computation for dense matrices with default data type backward computation for dense matrices with default data type
......
...@@ -33,7 +33,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim); ...@@ -33,7 +33,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return an XTensor structure) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return an XTensor structure) */
XTensor LogSoftmax(const XTensor &x, int leadDim); XTensor LogSoftmax(const XTensor &x, int leadDim);
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink = false); void LogSoftmax(const XTensor &x, XTensor &y, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (with both argument of x and y) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (with both argument of x and y) */
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim); void LogSoftmax(const XTensor &x, XTensor &y, int leadDim);
......
...@@ -75,7 +75,7 @@ XTensor Rectify(const XTensor &x) ...@@ -75,7 +75,7 @@ XTensor Rectify(const XTensor &x)
return y; return y;
} }
void Rectify(const XTensor &x, XTensor &y, bool requireLink) void Rectify(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -84,7 +84,7 @@ void Rectify(const XTensor &x, XTensor &y, bool requireLink) ...@@ -84,7 +84,7 @@ void Rectify(const XTensor &x, XTensor &y, bool requireLink)
/* call _Rectify function */ /* call _Rectify function */
_Rectify(&x, &y); _Rectify(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY); XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
} }
......
...@@ -32,7 +32,7 @@ void _Rectify(const XTensor * x, XTensor * y); ...@@ -32,7 +32,7 @@ void _Rectify(const XTensor * x, XTensor * y);
/* rectify function y = max(0, x) (return an XTensor structure) */ /* rectify function y = max(0, x) (return an XTensor structure) */
XTensor Rectify(const XTensor &x); XTensor Rectify(const XTensor &x);
void Rectify(const XTensor &x, XTensor &y, bool requireLink = false); void Rectify(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _RectifyBackward(XTensor * y, XTensor * x, void _RectifyBackward(XTensor * y, XTensor * x,
......
...@@ -78,7 +78,7 @@ XTensor Sigmoid(const XTensor &x) ...@@ -78,7 +78,7 @@ XTensor Sigmoid(const XTensor &x)
return y; return y;
} }
void Sigmoid(const XTensor &x, XTensor &y, bool requireLink) void Sigmoid(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -87,7 +87,7 @@ void Sigmoid(const XTensor &x, XTensor &y, bool requireLink) ...@@ -87,7 +87,7 @@ void Sigmoid(const XTensor &x, XTensor &y, bool requireLink)
/* call _Sigmoid function */ /* call _Sigmoid function */
_Sigmoid(&x, &y); _Sigmoid(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID); XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
} }
......
...@@ -32,7 +32,7 @@ void _Sigmoid(const XTensor * x, XTensor * y); ...@@ -32,7 +32,7 @@ void _Sigmoid(const XTensor * x, XTensor * y);
/* sigmoid function y = 1/(1+exp(-x)) (return an XTensor structure) */ /* sigmoid function y = 1/(1+exp(-x)) (return an XTensor structure) */
XTensor Sigmoid(const XTensor &x); XTensor Sigmoid(const XTensor &x);
void Sigmoid(const XTensor &x, XTensor &y, bool requireLink = false); void Sigmoid(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _SigmoidBackward(XTensor * y, XTensor * x, void _SigmoidBackward(XTensor * y, XTensor * x,
......
...@@ -148,7 +148,7 @@ XTensor Softmax(const XTensor &x, int leadDim) ...@@ -148,7 +148,7 @@ XTensor Softmax(const XTensor &x, int leadDim)
return y; return y;
} }
void Softmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink) void Softmax(const XTensor &x, XTensor &y, int leadDim)
{ {
int ld = leadDim; int ld = leadDim;
if (ld < 0) if (ld < 0)
...@@ -161,7 +161,7 @@ void Softmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink) ...@@ -161,7 +161,7 @@ void Softmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink)
/* call _Softmax function */ /* call _Softmax function */
_Softmax(&x, &y, ld); _Softmax(&x, &y, ld);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
......
...@@ -33,7 +33,7 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim); ...@@ -33,7 +33,7 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (return an XTensor structure) */ /* softmax y = e^x / \sum_{i} e^{x_i} (return an XTensor structure) */
XTensor Softmax(const XTensor &x, int leadDim); XTensor Softmax(const XTensor &x, int leadDim);
void Softmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink = false); void Softmax(const XTensor &x, XTensor &y, int leadDim);
/* de/dx */ /* de/dx */
void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
......
...@@ -195,17 +195,17 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, ...@@ -195,17 +195,17 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
delete[] dims; delete[] dims;
} }
//if(padding != NULL) { if(padding != NULL) {
// XTensor * tmp = NewTensor(padding); XTensor * tmp = NewTensor(padding);
// _IsNonZero(padding, tmp); _IsNonZero(padding, tmp);
// int nonZeroNum = (int)_ReduceSumAll(tmp); int nonZeroNum = (int)_ReduceSumAll(tmp);
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum); _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
// delete tmp; delete tmp;
//} }
//else { else {
// int num = dedy->unitNum / dedy->GetDim(n); int num = dedy->unitNum / dedy->GetDim(n);
// _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num); _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num);
//} }
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论