Commit b3a76184 by xuchen

big change! 1. modify all interface 2. modify the test case 3. merge with latest code of xiao

parent 2ed5a029
...@@ -21,12 +21,16 @@ ...@@ -21,12 +21,16 @@
#include <stdio.h> #include <stdio.h>
#include "XNet.h" #include "XNet.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/fnnlm/FNNLM.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
//#include <crtdbg.h> //#include <crtdbg.h>
using namespace nts; using namespace nts;
using namespace samplefnnlm;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
...@@ -34,15 +38,43 @@ int main( int argc, const char ** argv ) ...@@ -34,15 +38,43 @@ int main( int argc, const char ** argv )
if(argc > 1 && !strcmp(argv[1], "-test")) if(argc > 1 && !strcmp(argv[1], "-test"))
1;//Test(); 1;//Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{ else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n"); fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n"); fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
} }
XNet net; XNet net;
XTensor a; XTensor a;
net.Backward(a); XTensor b;
XTensor c;
InitTensor2D(&a, 2, 2);
InitTensor2D(&b, 2, 4);
InitTensor2D(&c, 2, 4);
a.SetZeroAll();
b.SetZeroAll();
c.SetZeroAll();
SetDataFixed(a, 0.1F);
a.Set2D(0.3F, 1, 0);
a.Set2D(0.4F, 1, 1);
b = Merge(a, a, 1);
c = HTanH(MMul(a, b));
a.Dump(stderr, "a:");
b.Dump(stderr, "b:");
c.Dump(stderr, "c:");
XLink::ShowNetwork(stderr, &c);
net.Backward(c);
net.Dump(stderr);
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for activation function
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
* Dingdang won 5 games in the GO training yesterday, hahaha ...
*/
#include "XNoder.h"
#include "XBackwardFunc.h"
#include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(node->grad != NULL, "No gradient found!");
CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
XTensor * input = income.tails[0];
XTensor * output = node;
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0);
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0);
_SoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else{
ShowNTErrors("Wrong activation function type!");
}
}
/* indicates whether the node is for an activation function */
bool XFuncGrad::IsFunc(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & FUNCTION_BASE) != 0;
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for activation function
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
* Dingdang won 5 games in the GO training yesterday, hahaha ...
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDFUNC_H__
#define __XBACKWARDFUNC_H__
namespace nts{
/* this class computes the gradient for activation functions given a node */
class XFuncGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for an activation function */
static
bool IsFunc(XTensor * node);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
*/
#include "XBackwardLoss.h"
#include "../tensor/XName.h"
#include "../tensor/function/HardTanH.h"
#include "../tensor/function/LogSoftmax.h"
namespace nts{
/*
compute dE/dx for a given function y = f(x)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> dedy - dE/dy
>> dedx - dE/dx
>> funcID - id of the function f
>> params - parameters of the function
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors(gold && y && x, "Empty input tensors!");
CheckNTErrors(dedx, "Empty gradient tensors!");
CheckNTErrors((funcID & FUNCTION_BASE) != 0, "Illegal function id");
if(funcID == FUNC_HARDTANH){
_HardTanHBackward(gold, y, x, dedy, dedx, lossName);
}
else if(funcID == FUNC_LOGSOFTMAX){
int leadDim = *(int*)params;
_LogSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
}
else{
ShowNTErrors("wrong function found when call the backward process!");
}
}
/*
compute dE/dy for variable y and error(loss) function E
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> dedy - dE/dy
>> lossName - name of the loss, e.g., cross entropy
*/
void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName)
{
_LossBackward(dedy, gold, y, lossName);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-17
* My students worked all night to prepare a submission to CWMT. Good luck
* to them!
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDLOSS_H__
#define __XBACKWARDLOSS_H__
namespace nts{
/* this class computes the gradient (of a output node)
with respect to the loss */
class XLossGrad
{
public:
/* compute dE/dx for a given function y = f(x) */
void Compute(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int funcID, void * params,
LOSS_FUNCTION_NAME lossName);
/* compute dE/dy for variable y and error(loss) function E */
void Compute(XTensor * gold, XTensor * y,
XTensor * dedy,
LOSS_FUNCTION_NAME lossName);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for math operations
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "XNoder.h"
#include "XBackwardMath.h"
#include "../tensor/XName.h"
#include "../tensor/core/CHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XMathGrad::MakeGrad(XTensor * node)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == MATH_SUM)
GradSum(node);
else if(operID == MATH_MULTIPLY)
GradMultiply(node);
else if(operID == MATH_MATRIXMUL)
GradMatrixMul(node);
else{
ShowNTErrors("TODO!");
}
}
/* indicates whether the node is for a math operation */
bool XMathGrad::IsMathOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & MATH_BASE) != 0;
}
/*
gradient for sum
for
c = a + b * \beta
we have
dE/da = dE/dc
dE/db = dE/dc * \beta
>> node - the node (c) for backward computation
*/
void XMathGrad::GradSum(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUM!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad);
_Sum(b->grad, node->grad, b->grad, beta);
}
/*
gradient for multiply (dot production)
for
c = a * b
we have
dE/da = dE/dc * b
dE/db = dE/dc * a
>> node - the node (c) for backward computation
*/
void XMathGrad::GradMultiply(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsIdentical(a, b), "Wrong sized input tensors!");
_Multiply(node->grad, b, a->grad, 1.0F);
_Multiply(node->grad, a, b->grad, 1.0F);
}
/*
gradient for matrix multiply
for c = matmul(a, b) * \alpha
we have
dE/da = dE/dc * b^T * \alpha
dE/db = a^T * dE/dc * \alpha
>> node - the node (c) for backward computation
*/
void XMathGrad::GradMatrixMul(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
CheckNTErrors(income.paramNum == 3, "Wrong parameter number for MULTIPLY!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
MATRIX_TRANS_TYPE transA = income.GetParamTrans(0);
MATRIX_TRANS_TYPE transB = income.GetParamTrans(1);
DTYPE alpha = income.GetParam(2);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
XTensor * dedc = node->grad;
XTensor * deda = a->grad;
XTensor * dedb = b->grad;
/* c = a * b * \alpha */
if(transA == X_NOTRANS && transB == X_NOTRANS){
/* dE/da = dE/dc * b^T * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a^T * dE/dc * \alpha */
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a^T * b * \alpha */
else if(transA == X_TRANS && transB == X_NOTRANS){
/* dE/da = dE/dc * b^T * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a * dE/dc * \alpha */
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a * b^T * \alpha */
else if(transA == X_NOTRANS && transB == X_TRANS){
/* dE/da = dE/dc * b * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
/* dE/db = a^T * dE/dc * \alpha */
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a^T * b^T * \alpha */
else if(transA == X_TRANS && transB == X_TRANS){
/* dE/da = dE/dc * b * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
/* dE/db = a * dE/dc * \alpha */
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
}
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for math operations
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#ifndef __XBACKWARDMATH_H__
#define __XBACKWARDMATH_H__
namespace nts{
/* this class computes the gradient for math operations given a node */
class XMathGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for a math operation */
static
bool IsMathOP(XTensor * node);
private:
/* gradient for sum: c = a + b * \beta */
static
void GradSum(XTensor * node);
/* gradient for multiply (dot production): c = a * b */
static
void GradMultiply(XTensor * node);
/* gradient for matrix multiply: c = matmul(a, b) */
static
void GradMatrixMul(XTensor * node);
};
}
#endif
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for math operations
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-19
* It was chilly when I came into the office this morning ...
* because i forgot to turn the air-condition off last night :(
*/
#include "XNoder.h"
#include "XBackwardShape.h"
#include "../tensor/XName.h"
#include "../tensor/core/CHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XShapeGrad::MakeGrad(XTensor * node)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income;
int operID = income.typeID;
if(operID == SHAPE_MERGE)
GradMerge(node);
else if(operID == SHAPE_MERGE_LIST)
GradMergeList(node);
else if(operID == SHAPE_UNSQUEEZE)
GradUnsqueeze(node);
else{
ShowNTErrors("TODO!");
}
}
/* indicates whether the node is for a math operation */
bool XShapeGrad::IsShapeOP(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & DATA_BASE) != 0;
}
/*
gradient for merge
for
c = merge(a_0, a_1, ...)
where a_i is the i-th block in a tensor a
we have
dE/da_0 = dE/dc_{split_0}
dE/db_1 = dE/dc_{split_1}
...
i.e.,
dE/da = split(dE/dc)
>> node - the node (c) for backward computation
*/
void XShapeGrad::GradMerge(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 0, "Wrong input tensor number for MERGE!");
XTensor * input = income.tails[0];
int whereToMerge = income.GetParamInt(0);
int leadDim = income.GetParamInt(1);
int blockSize = 1;
int blockNum = 1;
for(int i = 0; i < input->order; i++){
if(i < leadDim)
blockNum *= input->dimSize[i];
}
blockSize = input->GetDataSizeInChar() / blockNum;
XNoder::MakeGrad(input);
int * dims = new int[input->order];
for(int i = 0, j = 0; i < input->order; i++){
if(i >= leadDim){
dims[j++] = input->dimSize[i];
}
}
dims[0] = -dims[0];
XTensor gradInputSmall(input->order - leadDim, dims,
input->dataType, input->denseRatio,
input->devID, input->mem);
dims[whereToMerge - leadDim] *= dims[0];
XTensor gradNodeSmall(node->order - leadDim, dims,
node->dataType, node->denseRatio,
node->devID, node->mem);
/* we can simply split the gradient tensor
if the input is used in merging only */
if(input->outgo.tailNum == 1){
for(int i = 0; i < blockNum; i++){
gradNodeSmall.data = (char*)node->grad->data + i * blockSize;
gradInputSmall.data = (char*)input->grad->data + i * blockSize;
_Split(&gradNodeSmall, &gradInputSmall, whereToMerge - leadDim, input->dimSize[leadDim]);
}
}
/* a more complicated case is that the input tensor is used for
other operations somewhere else. So we have to do gradient
accumulation after spliting, i.e., we need an additional
SUM operation */
else{
XTensor gradInputSmallBuf(&gradInputSmall);
for(int i = 0; i < blockNum; i++){
gradNodeSmall.data = (char*)node->grad->data + i * blockSize;
gradInputSmall.data = (char*)input->grad->data + i * blockSize;
_Split(&gradNodeSmall, &gradInputSmallBuf, whereToMerge - leadDim, input->dimSize[leadDim]);
_Sum(&gradInputSmall, &gradInputSmallBuf, &gradInputSmall);
}
}
gradNodeSmall.data = NULL;
gradInputSmall.data = NULL;
delete[] dims;
}
/*
gradient for merging a list of tensors
for
c = merge(list(a, b, ...))
where a, b ... are of the same size
we have
dE/da = dE/dc_{split_0}
dE/db = dE/dc_{split_1}
i.e.,
list(dE/da, dE/db, ...) = split(dE/dc)
>> node - the node (c) for backward computation
*/
void XShapeGrad::GradMergeList(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
XTensor * last = NULL;
XList smalls(income.tailNum);
XList smallsGrad(income.tailNum);
bool mergeOnly = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * tail = income.tails[i];
XNoder::MakeGrad(tail);
smalls.Add(tail);
smallsGrad.Add(tail->grad);
if(i > 1){
CheckNTErrors(XTensor::IsIdentical(last, tail),
"Input tensors must be of the same size!");
}
if(tail->outgo.tailNum > 1)
mergeOnly = false;
last = tail;
}
int whereToMerge = income.GetParamInt(0);
/* we can simply split the gradient tensor into the input tensors
if the inputs are used in merging only */
if(mergeOnly)
_Split(node->grad, &smallsGrad, whereToMerge, smalls.count);
/* a more complicated case is that the input tensors are used for
other operations somewhere else. So we have to do gradient
accumulation after spliting, i.e., we need an additional
SUM operation */
else{
int * dims = new int[last->order + 1];
dims[0] = smalls.count;
for(int i = 0; i < last->order; i++)
dims[i + 1] = last->dimSize[i];
XTensor gradSplit(last->order + 1, dims,
last->dataType, last->denseRatio,
last->devID, last->mem);
_Split(node->grad, &gradSplit, whereToMerge, smalls.count);
memcpy(dims, last->dimSize, sizeof(int) * last->order);
dims[0] = -dims[0];
XTensor gradSmall(last->order, dims,
last->dataType, last->denseRatio,
last->devID, last->mem);
/* gradient accumulation for each split */
for(int i = 0; i < smalls.count; i++){
XTensor * inputGrad = (XTensor*)smallsGrad.Get(i);
gradSmall.data = (char*)gradSplit.data + i * last->unitNum * last->unitSize;
_Sum(inputGrad, &gradSmall, inputGrad);
}
gradSmall.data = NULL;
delete[] dims;
}
}
/*
gradient for unsqueezing a tensor
for
c = unsqueeze(a)
we have
dE/da = reduecesum(dE/dc)
>> node - the node (c) for backward computation
*/
void XShapeGrad::GradUnsqueeze(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!");
XTensor * output = node;
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
int dim = income.GetParamInt(0);
int dSize = income.GetParamInt(1);
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
_ReduceSum(output->grad, input->grad, dim);
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for shaping and data movement
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#include "../tensor/function/FHeader.h"
#ifndef __XBACKWARDSHAPE_H__
#define __XBACKWARDSHAPE_H__
namespace nts{
/* this class computes the gradient for tensor shaping and movement given a node */
class XShapeGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for a shaping operation */
static
bool IsShapeOP(XTensor * node);
private:
/* gradient for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node);
/* gradient for merging a list of tensors : c = merge(list(a, b, ...)) */
static
void GradMergeList(XTensor * node);
/* gradient for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradUnsqueeze(XTensor * node);
};
}
#endif
\ No newline at end of file
...@@ -20,6 +20,12 @@ ...@@ -20,6 +20,12 @@
*/ */
#include "XNet.h" #include "XNet.h"
#include "XNoder.h"
#include "XBackwardLoss.h"
#include "XBackwardMath.h"
#include "XBackwardFunc.h"
#include "XBackwardShape.h"
#include "../tensor/XName.h"
namespace nts{ namespace nts{
...@@ -78,6 +84,22 @@ void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss) ...@@ -78,6 +84,22 @@ void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
} }
/* /*
backward propagation to obtain gradient
>> root - root node (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(NULL);
Backward(roots, golds, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes with a number of root nodes
>> root - a list of root nodes (output) of the network >> root - a list of root nodes (output) of the network
...@@ -87,6 +109,85 @@ with a number of root nodes ...@@ -87,6 +109,85 @@ with a number of root nodes
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss) void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{ {
Traverse(roots); Traverse(roots);
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED;
}
XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
for(int i = 0; i < roots.count; i++){
XTensor * root = (XTensor*)roots.Get(i);
XTensor * gold = (XTensor*)golds.Get(i);
XLink &income = root->income;
int funcID = income.typeID;
void * params = income.params;
/* we compute dE/dx if the output is generated by an activation function y = f(x).
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if(gold != NULL && income.tailNum == 1 && (funcID & FUNCTION_BASE)){
XTensor * x = income.tails[0];
XNoder::MakeGrad(x);
lossGrad.Compute(gold, root, x, NULL, x->grad, funcID, params, loss);
root->visitMark = NODE_FINISHED;
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else{
XNoder::MakeGrad(root);
lossGrad.Compute(gold, root, root->grad, loss);
}
}
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);
if(node->visitMark == NODE_FINISHED)
continue;
BackwardNode(node);
}
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> root - a list of root nodes (output) of the network
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
{
XList golds(roots.count);
for(int i = 0; i < roots.count; i++)
golds.Add(NULL);
Backward(roots, golds, loss);
}
/*
backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function)
*/
void XNet::BackwardNode(XTensor * node)
{
if(node == NULL || node->visitMark == NODE_FINISHED)
return;
if(!XNoder::IsLeaf(node)){
if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node);
else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node);
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node);
else{
ShowNTErrors("Wrong node type!");
}
}
node->visitMark = NODE_FINISHED;
} }
/* /*
...@@ -115,6 +216,15 @@ void XNet::Traverse(XList &roots) ...@@ -115,6 +216,15 @@ void XNet::Traverse(XList &roots)
for (int i = 0; i < roots.count; i++) for (int i = 0; i < roots.count; i++)
TarjanVisit((XTensor*)roots.Get(i), nodes, id); TarjanVisit((XTensor*)roots.Get(i), nodes, id);
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
if(XNoder::IsRoot(node))
outputs.Add(node);
if(XNoder::IsLeaf(node))
inputs.Add(node);
if(XNoder::IsGrad(node))
gradNodes.Add(node);
}
} }
/* /*
...@@ -145,4 +255,22 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code) ...@@ -145,4 +255,22 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
} }
} }
/*
dump network information
>> file - the file for dumping
*/
void XNet::Dump(FILE * file)
{
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
fprintf(file, "node %d: %d\n", i, node->id);
node->Dump(file, "tensor: ");
if(node->grad != NULL)
node->grad->Dump(file, "grad: ");
else
fprintf(file, "no gradient!\n");
fprintf(file, "\n");
}
}
} }
\ No newline at end of file
...@@ -57,11 +57,21 @@ struct XNet ...@@ -57,11 +57,21 @@ struct XNet
void Clear(); void Clear();
/* backward propagation to obtain gradient wrt. the loss/error function */ /* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold = NULLTensor, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient */
void Backward(XTensor &root, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds = NULLList, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward propagation to obtain gradient
with a number of root nodes */
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */
void BackwardNode(XTensor * node);
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
...@@ -73,6 +83,9 @@ struct XNet ...@@ -73,6 +83,9 @@ struct XNet
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */ /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code); void TarjanVisit(XTensor * node, XList &orders, const unsigned int code);
/* dump network information */
void Dump(FILE * file);
}; };
/* we make a unique id for every tensor */ /* we make a unique id for every tensor */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "XNoder.h"
namespace nts{
/* make gradient tensor for a node */
void XNoder::MakeGrad(XTensor * node)
{
if(node == NULL)
return;
if(!XTensor::IsIdentical(node, node->grad)){
delete node->grad;
node->grad = NewTensor(node);
node->grad->SetZeroAll();
}
}
/* the node is a leaf node (intput) or not */
bool XNoder::IsLeaf(XTensor * node)
{
if(node == NULL)
return false;
if(node->income.tailNum == 0)
return true;
else
return false;
}
/* the node is a root node (output) or not */
bool XNoder::IsRoot(XTensor * node)
{
if(node == NULL)
return false;
if(node->outgo.tailNum == 0)
return true;
else
return false;
}
/* the node keeps the gradinent or not */
bool XNoder::IsGrad(XTensor * node)
{
if(node == NULL)
return false;
if(node->isGrad)
return true;
else
return false;
}
}
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* low-level utilities
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#ifndef __XNODER_H__
#define __XNODER_H__
namespace nts{
#define NODE_UNFINISHED 0
#define NODE_DOING 1
#define NODE_FINISHED 2
/* node management */
class XNoder
{
public:
/* make gradient tensor for a node */
static
void MakeGrad(XTensor * node);
/* the node is a leaf node (intput) or not */
static
bool IsLeaf(XTensor * node);
/* the node is a root node (output) or not */
static
bool IsRoot(XTensor * node);
/* the node keeps the gradinent or not */
static
bool IsGrad(XTensor * node);
};
}
#endif
\ No newline at end of file
...@@ -27,10 +27,11 @@ ...@@ -27,10 +27,11 @@
#include <math.h> #include <math.h>
#include "FNNLM.h" #include "FNNLM.h"
#include "../../XGlobal.h" #include "../../tensor/XGlobal.h"
#include "../../XUtility.h" #include "../../tensor/XUtility.h"
#include "../../XDevice.h" #include "../../tensor/XDevice.h"
#include "../../function/FHeader.h" #include "../../tensor/function/FHeader.h"
#include "../../network/XNet.h"
namespace samplefnnlm namespace samplefnnlm
{ {
...@@ -50,6 +51,7 @@ float minmax = 0.08F; // range [-p,p] for parameter initializati ...@@ -50,6 +51,7 @@ float minmax = 0.08F; // range [-p,p] for parameter initializati
int sentBatch = 0; // batch size at the sentence level int sentBatch = 0; // batch size at the sentence level
int wordBatch = 1; // batch size at the word level int wordBatch = 1; // batch size at the word level
bool shuffled = false; // shuffled the training data file or not bool shuffled = false; // shuffled the training data file or not
bool autoDiff = false; // indicator of automatic differentiation
void LoadArgs(int argc, const char ** argv, FNNModel &model); void LoadArgs(int argc, const char ** argv, FNNModel &model);
void Init(FNNModel &model); void Init(FNNModel &model);
...@@ -59,7 +61,7 @@ void Clear(FNNModel &model); ...@@ -59,7 +61,7 @@ void Clear(FNNModel &model);
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model); void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model);
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model); void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model);
void Train(const char * train, bool isShuffled, FNNModel &model); void Train(const char * train, bool isShuffled, FNNModel &model);
void Update(FNNModel &model, FNNModel &grad, float epsilon); void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad);
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL); float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL);
void Dump(const char * fn, FNNModel &model); void Dump(const char * fn, FNNModel &model);
void Read(const char * fn, FNNModel &model); void Read(const char * fn, FNNModel &model);
...@@ -71,6 +73,8 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz ...@@ -71,6 +73,8 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net); void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss, void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, FNNModel &grad, FNNNet &net); FNNModel &model, FNNModel &grad, FNNNet &net);
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, XNet &net);
/* /*
entry of the program entry of the program
...@@ -98,6 +102,7 @@ arguments: ...@@ -98,6 +102,7 @@ arguments:
-devid D: the id of the device used -devid D: the id of the device used
-1: GPU, >=0: GPUs -1: GPU, >=0: GPUs
-mempool: use memory pools for memory management -mempool: use memory pools for memory management
-autodiff: use automatic differentiation for training
where S=string, D=integer and F=float. where S=string, D=integer and F=float.
All words in the training and test data files All words in the training and test data files
...@@ -182,6 +187,8 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model) ...@@ -182,6 +187,8 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model)
wordBatch = atoi(argv[i + 1]); wordBatch = atoi(argv[i + 1]);
if(!strcmp(argv[i], "-shuffle")) if(!strcmp(argv[i], "-shuffle"))
shuffled = true; shuffled = true;
if(!strcmp(argv[i], "-autodiff"))
autoDiff = true;
if(!strcmp(argv[i], "-dev") && i + 1 < argc) if(!strcmp(argv[i], "-dev") && i + 1 < argc)
model.devID = atoi(argv[i + 1]); model.devID = atoi(argv[i + 1]);
} }
...@@ -350,6 +357,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -350,6 +357,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
FNNModel grad; FNNModel grad;
Copy(grad, model); Copy(grad, model);
/* XNet for automatic differentiation */
XNet autoDiffer;
double startT = GetClockSec(); double startT = GetClockSec();
/* iterate for a number of epochs */ /* iterate for a number of epochs */
...@@ -380,9 +390,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -380,9 +390,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the gold standard */ /* the gold standard */
XTensor gold; XTensor gold;
/* prepare an empty network for building the fnn */
FNNNet net;
/* make the input tensor for position i */ /* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++) for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem); MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem);
...@@ -390,17 +397,29 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -390,17 +397,29 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* make the gold tensor */ /* make the gold tensor */
MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem); MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem);
/* gradident = 0 */ if(!autoDiff){
Clear(grad); /* prepare an empty network for building the fnn */
FNNNet net;
/* gradident = 0 */
Clear(grad);
/* forward computation */ /* forward computation */
Forward(inputs, output, model, net); Forward(inputs, output, model, net);
/* backward computation to obtain gradients */ /* backward computation to obtain gradients */
Backward(inputs, output, gold, CROSSENTROPY, model, grad, net); Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
/* update model parameters */
Update(model, grad, learningRate, false);
}
else{
/* forward + backward process */
FBInOne(inputs, output, gold, CROSSENTROPY, model, autoDiffer);
/* update model parameters */ /* update model parameters */
Update(model, grad, learningRate); Update(model, grad, learningRate, true);
}
/* get probabilities */ /* get probabilities */
float prob = GetProb(output, gold); float prob = GetProb(output, gold);
...@@ -442,26 +461,45 @@ update the model parameters using the delta rule ...@@ -442,26 +461,45 @@ update the model parameters using the delta rule
>> model - the model to update >> model - the model to update
>> grad - gradients >> grad - gradients
>> epsilon - learning rate >> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
*/ */
void Update(FNNModel &model, FNNModel &grad, float epsilon) void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
{ {
XList paraList(10); XList paraList(10);
XList gradList(10); XList gradList(10);
paraList.Add(&model.outputW); paraList.Add(&model.outputW);
gradList.Add(&grad.outputW);
paraList.Add(&model.outputB); paraList.Add(&model.outputB);
gradList.Add(&grad.outputB);
for (int i = 0; i < model.hDepth; i++) { for (int i = 0; i < model.hDepth; i++) {
paraList.Add(&model.hiddenW[i]); paraList.Add(&model.hiddenW[i]);
gradList.Add(&grad.hiddenW[i]);
paraList.Add(&model.hiddenB[i]); paraList.Add(&model.hiddenB[i]);
gradList.Add(&grad.hiddenB[i]);
} }
paraList.Add(&model.embeddingW); paraList.Add(&model.embeddingW);
gradList.Add(&grad.embeddingW);
if(!isNodeGrad){
gradList.Add(&grad.outputW);
gradList.Add(&grad.outputB);
for (int i = 0; i < model.hDepth; i++) {
gradList.Add(&grad.hiddenW[i]);
gradList.Add(&grad.hiddenB[i]);
}
;
gradList.Add(&grad.embeddingW);
}
else{
paraList.Add(model.outputW.grad);
paraList.Add(&model.outputB.grad);
for (int i = 0; i < model.hDepth; i++) {
paraList.Add(&model.hiddenW[i].grad);
paraList.Add(&model.hiddenB[i].grad);
}
paraList.Add(&model.embeddingW.grad);
}
for (int i = 0; i < paraList.count; i++) { for (int i = 0; i < paraList.count; i++) {
XTensor * para = (XTensor*)paraList.GetItem(i); XTensor * para = (XTensor*)paraList.GetItem(i);
...@@ -773,7 +811,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -773,7 +811,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* for y = softmax(s), we get dE/ds /* for y = softmax(s), we get dE/ds
where E is the error function (define by loss) */ where E is the error function (define by loss) */
LogSoftmaxBackward(&gold, &y, &s, NULL, &deds, 1, loss); _LogSoftmaxBackward(&gold, &y, &s, NULL, &deds, 1, loss);
/* for s = x * w, we get /* for s = x * w, we get
dE/w_{i,j} = dE/ds_j * ds/dw_{i,j} dE/w_{i,j} = dE/ds_j * ds/dw_{i,j}
...@@ -818,7 +856,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -818,7 +856,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* backpropagation through the activation fucntion: /* backpropagation through the activation fucntion:
dE/ds = dE/dh * dh/ds */ dE/ds = dE/dh * dh/ds */
HardTanHBackward(NULL, &h, &s, &dedh, &deds, NOLOSS); _HardTanHBackward(NULL, &h, &s, &dedh, &deds, NOLOSS);
/* gradient of the weight: dE/dw = x^T * dE/ds */ /* gradient of the weight: dE/dw = x^T * dE/ds */
_MatrixMul(&x, X_TRANS, &deds, X_NOTRANS, &dedw); _MatrixMul(&x, X_TRANS, &deds, X_NOTRANS, &dedw);
...@@ -863,6 +901,55 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -863,6 +901,55 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
} }
} }
/*
forward + backward in one procedure
>> inputs - input word representations
>> output - output probability
>> gold - gold standard
>> loss - loss function name
>> model - the fnn model
*/
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold,
LOSS_FUNCTION_NAME loss, FNNModel &model, XNet &net)
{
int batchSize = gold.GetDim(0);
int n = model.n;
int depth = model.hDepth;
XTensor words;
XTensor embeddingBig;
XTensor hidden;
XTensor b;
XList inputList(n - 1);
for(int i = 0; i < n - 1; i++)
inputList.Add(inputs + i);
/* represent n - 1 words in one tensor */
words = Merge(inputList, 0);
/* word embedding */
embeddingBig = MMul(words, model.embeddingW);
/* input of the first hidden layer */
hidden = Split(embeddingBig, 0, n - 1);
hidden = Merge(hidden, 2, 0);
/* hidden layers */
for(int i = 0; i < depth; i++){
b = Unsqueeze(model.hiddenB[i], 1, batchSize);
hidden = MMul(hidden, model.hiddenW) + b;
}
b = Unsqueeze(model.outputB, 1, batchSize);
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + b, 1);
/* automatic differentiation */
net.Backward(output);
}
/* /*
dump the model to the disk space dump the model to the disk space
>> fn - where to keep the model >> fn - where to keep the model
......
...@@ -30,9 +30,9 @@ ...@@ -30,9 +30,9 @@
#ifndef __FNNLM_H__ #ifndef __FNNLM_H__
#define __FNNLM_H__ #define __FNNLM_H__
#include "../../XGlobal.h" #include "../../tensor/XGlobal.h"
#include "../../XTensor.h" #include "../../tensor/XTensor.h"
#include "../../core/CHeader.h" #include "../../tensor/core/CHeader.h"
using namespace nts; using namespace nts;
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <time.h> #include <time.h>
#include "XTensor.h" #include "XTensor.h"
#include "XDevice.h" #include "XDevice.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./test/Test.h" #include "./test/Test.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
...@@ -36,7 +35,6 @@ ...@@ -36,7 +35,6 @@
//#include <crtdbg.h> //#include <crtdbg.h>
using namespace nts; using namespace nts;
using namespace samplefnnlm;
void SmallTest(); void SmallTest();
...@@ -45,21 +43,17 @@ int main( int argc, const char ** argv ) ...@@ -45,21 +43,17 @@ int main( int argc, const char ** argv )
//_CrtSetBreakAlloc(123); //_CrtSetBreakAlloc(123);
/* a tiny test */ /* a tiny test */
if(true) SmallTest();
SmallTest();
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
return 0; //return 0;
if(argc > 1 && !strcmp(argv[1], "-test")) if(argc > 1 && !strcmp(argv[1], "-test"))
Test(); Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{ else{
fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n"); fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
fprintf(stderr, "use of tensors. All you need is to ... \n\n"); fprintf(stderr, "use of tensors. All you need is to ... \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
} }
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
int XLink::paramSize = 64; int XLink::paramSize = PARAM_UNTI_SIZE;
/* constuctor */ /* constuctor */
XLink::XLink() XLink::XLink()
...@@ -114,6 +114,8 @@ void XLink::ClearOutgoing(XTensor * node) ...@@ -114,6 +114,8 @@ void XLink::ClearOutgoing(XTensor * node)
outgo.ClearTail(); outgo.ClearTail();
outgo.typeID = 0; outgo.typeID = 0;
outgo.type[0] = 0; outgo.type[0] = 0;
delete[] (char*)outgo.params;
outgo.params = NULL;
} }
/* /*
...@@ -152,6 +154,8 @@ void XLink::ClearIncoming(XTensor * node) ...@@ -152,6 +154,8 @@ void XLink::ClearIncoming(XTensor * node)
income.ClearTail(); income.ClearTail();
income.typeID = 0; income.typeID = 0;
income.type[0] = 0; income.type[0] = 0;
delete[] (char*)income.params;
income.params = NULL;
} }
/* /*
...@@ -210,7 +214,7 @@ add a parameter ...@@ -210,7 +214,7 @@ add a parameter
void XLink::AddParam(DTYPE param) void XLink::AddParam(DTYPE param)
{ {
void * ps = params; void * ps = params;
params = new char[paramNum + 1]; params = new char[(paramNum + 1) * paramSize];
memcpy(params, ps, paramNum * paramSize); memcpy(params, ps, paramNum * paramSize);
DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize); DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize);
*p = param; *p = param;
...@@ -226,7 +230,7 @@ add a parameter ...@@ -226,7 +230,7 @@ add a parameter
void XLink::AddParam(void * param, int size) void XLink::AddParam(void * param, int size)
{ {
void * ps = params; void * ps = params;
params = new char[paramNum + 1]; params = new char[(paramNum + 1) * paramSize];
memcpy(params, ps, paramNum * paramSize); memcpy(params, ps, paramNum * paramSize);
char * p = (char*)params + paramNum * paramSize; char * p = (char*)params + paramNum * paramSize;
memcpy(p, param, size); memcpy(p, param, size);
...@@ -235,6 +239,42 @@ void XLink::AddParam(void * param, int size) ...@@ -235,6 +239,42 @@ void XLink::AddParam(void * param, int size)
} }
/* /*
get a paramter in default type
>> i - id of the parameter
<< return - the parameter in default type
*/
DTYPE XLink::GetParam(int i)
{
CheckNTErrors(params != NULL, "parameter array cannot be empty!");
char * p = (char*)params + i * paramSize;
return *(DTYPE*)p;
}
/*
get a paramter in integer
>> i - id of the parameter
<< return - the parameter in integer
*/
int XLink::GetParamInt(int i)
{
CheckNTErrors(params != NULL, "parameter array cannot be empty!");
char * p = (char*)params + i * paramSize;
return *(int*)p;
}
/*
get a parameter in MATRIX_TRANS_TYPE
>> i - id of the parameter
<< return - the parameter in MATRIX_TRANS_TYPE
*/
MATRIX_TRANS_TYPE XLink::GetParamTrans(int i)
{
CheckNTErrors(params != NULL, "parameter array cannot be empty!");
char * p = (char*)params + i * paramSize;
return *(MATRIX_TRANS_TYPE*)p;
}
/*
create a hyperedge with two input tensors and a output tensor create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor >> t1 - a tail tensor
>> t2 - another tail tensor >> t2 - another tail tensor
...@@ -288,14 +328,44 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id) ...@@ -288,14 +328,44 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id)
} }
/* /*
create a hyper edge with a input tensors and a list of output tensors
>> h - a input tensor
>> list - a list of output tensors
>> id - id of the edge type
*/
void XLink::MakeLink(XTensor * t, XList * list, int id)
{
/* forward */
for(int i = 0; i < list->count; i++){
XTensor * h = (XTensor*)list->GetItem(i);
if(h == NULL)
continue;
XLink &income = h->income;
income.Reset();
income.SetHead(h);
income.SetType(id);
income.AddTail(t);
}
/* backward */
XLink &outgo = t->outgo;
CheckNTErrors(outgo.head == NULL || outgo.head == t, "Wrong head of the hyperedge!");
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
if(t == NULL)
continue;
outgo.AddTail(t);
}
}
/*
add parameters add parameters
>> h - head >> h - head
>> param - parameter we want introduce >> param - parameter we want introduce
*/ */
void XLink::AddParamToHead(XTensor * h, DTYPE param) void XLink::AddParamToHead(XTensor * h, DTYPE param)
{ {
if(h != NULL) CheckNTErrors(h != NULL, "head tensor cannot be empty!");
return;
h->income.AddParam(param); h->income.AddParam(param);
} }
...@@ -306,8 +376,7 @@ add an integer parameter ...@@ -306,8 +376,7 @@ add an integer parameter
*/ */
void XLink::AddParamToHeadInt(XTensor * h, int param) void XLink::AddParamToHeadInt(XTensor * h, int param)
{ {
if(h != NULL) CheckNTErrors(h != NULL, "head tensor cannot be empty!");
return;
h->income.AddParam(&param, sizeof(int)); h->income.AddParam(&param, sizeof(int));
} }
...@@ -318,8 +387,7 @@ add a MATRIX_TRANS_TYPE parameter ...@@ -318,8 +387,7 @@ add a MATRIX_TRANS_TYPE parameter
*/ */
void XLink::AddParamToHeadTrans(XTensor * h, MATRIX_TRANS_TYPE param) void XLink::AddParamToHeadTrans(XTensor * h, MATRIX_TRANS_TYPE param)
{ {
if(h != NULL) CheckNTErrors(h != NULL, "head tensor cannot be empty!");
return;
h->income.AddParam(&param, sizeof(MATRIX_TRANS_TYPE)); h->income.AddParam(&param, sizeof(MATRIX_TRANS_TYPE));
} }
...@@ -376,6 +444,11 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne) ...@@ -376,6 +444,11 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
newIncome.tailNum = oldOne->income.tailNum; newIncome.tailNum = oldOne->income.tailNum;
memcpy(newIncome.tails, oldOne->income.tails, sizeof(XTensor*) * newIncome.tailNum); memcpy(newIncome.tails, oldOne->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = oldOne->income.paramNum * oldOne->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, oldOne->income.params, paraArraySize);
newIncome.paramNum = oldOne->income.paramNum;
/* update the link to each child node */ /* update the link to each child node */
for(int i = 0; i < newIncome.tailNum; i++){ for(int i = 0; i < newIncome.tailNum; i++){
XTensor * child = newIncome.tails[i]; XTensor * child = newIncome.tails[i];
......
...@@ -34,6 +34,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -34,6 +34,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
struct XTensor; struct XTensor;
#define MAX_OP_NAME_LENGTH 16 #define MAX_OP_NAME_LENGTH 16
#define PARAM_UNTI_SIZE 64
/* /*
This defines the link among tensors in networks. XLink can be This defines the link among tensors in networks. XLink can be
...@@ -115,12 +116,21 @@ struct XLink ...@@ -115,12 +116,21 @@ struct XLink
/* add two tails in one time */ /* add two tails in one time */
void AddTwoTails(XTensor * t1, XTensor * t2); void AddTwoTails(XTensor * t1, XTensor * t2);
/* add a integer parameter */ /* add a parameter in default type */
void AddParam(DTYPE param); void AddParam(DTYPE param);
/* add a integer parameter */ /* add a parameter */
void AddParam(void * param, int size); void AddParam(void * param, int size);
/* get a paramter in default type */
DTYPE GetParam(int i);
/* get a paramter in integer */
int GetParamInt(int i);
/* get a parameter in MATRIX_TRANS_TYPE */
MATRIX_TRANS_TYPE GetParamTrans(int i);
/* create a hyper edge with two input tensors and a output tensor */ /* create a hyper edge with two input tensors and a output tensor */
static static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
...@@ -129,6 +139,10 @@ struct XLink ...@@ -129,6 +139,10 @@ struct XLink
static static
void MakeLink(const XList * list, XTensor * h, int id); void MakeLink(const XList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */
static
void MakeLink(XTensor * h, XList * list, int id);
/* add a parameter */ /* add a parameter */
static static
void AddParamToHead(XTensor * h, DTYPE param); void AddParamToHead(XTensor * h, DTYPE param);
......
...@@ -26,57 +26,81 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -26,57 +26,81 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */ /* get operator name */
const char * GetOPName(int type) const char * GetOPName(int type)
{ {
if((type & MATH_ARITHMETIC) != 0){ if ((type & MATH_BASE) != 0){
if(type == MATH_ABSOLUTE) if (type == MATH_ABSOLUTE)
return "M_ABSOLUTE"; return "M_ABSOLUTE";
else if(type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if(type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
return "M_MATRIXMULBATCHED"; return "M_MATRIXMULBATCHED";
else if(type == MATH_MULTIPLY) else if (type == MATH_MULTIPLY)
return "M_MULTIPLY"; return "M_MULTIPLY";
else if(type == MATH_NEGATE) else if (type == MATH_NEGATE)
return "M_NEGATE"; return "M_NEGATE";
else if(type == MATH_SIGN) else if (type == MATH_SIGN)
return "M_SIGN"; return "M_SIGN";
else if(type == MATH_SUM) else if (type == MATH_SUM)
return "M_SUM"; return "M_SUM";
else if(type == MATH_LOG) else if (type == MATH_LOG)
return "M_NORMALIZE";
else if(type == MATH_NORMALIZE)
return "M_LOG"; return "M_LOG";
else if(type == MATH_POWER) else if (type == MATH_NORMALIZE)
return "M_NORMALIZE";
else if (type == MATH_POWER)
return "M_POWER"; return "M_POWER";
else if(type == MATH_SCALEANDSHIFT) else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT"; return "M_SCALEANDSHIFT";
else if(type == GETANDSET_SELECT) else if (type == REDUCE_REDUCEMAX)
return "G_SELECT";
else if(type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
else if(type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES";
else if(type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX"; return "R_REDUCEMAX";
else if(type == REDUCE_REDUCEMEAN) else if (type == REDUCE_REDUCEMEAN)
return "R_REDUCEMEAN"; return "R_REDUCEMEAN";
else if(type == REDUCE_REDUCESUM) else if (type == REDUCE_REDUCESUM)
return "R_REDUCESUM"; return "R_REDUCESUM";
else if(type == REDUCE_REDUCESUMSQUARED) else if (type == REDUCE_REDUCESUMSQUARED)
return "R_REDUCESUMSQUARED"; return "R_REDUCESUMSQUARED";
else if(type == REDUCE_REDUCEVARIANCE) else if (type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE"; return "R_REDUCEVARIANCE";
else if(type == SHAPE_CONCATENATE) }
else if ((type & DATA_BASE) != 0){
if (type == GETANDSET_SELECT)
return "G_SELECT";
else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
else if (type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES";
else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE"; return "S_CONCATENATE";
else if(type == SHAPE_MERGE) else if (type == SHAPE_MERGE)
return "S_MERGE"; return "S_MERGE";
else if(type == SHAPE_PERMUTE) else if (type == SHAPE_MERGE_LIST)
return "S_MERGE_LIST";
else if (type == SHAPE_PERMUTE)
return "S_PERMUTE"; return "S_PERMUTE";
else if(type == SHAPE_SPLIT) else if (type == SHAPE_SPLIT)
return "S_SPLIT"; return "S_SPLIT";
else if(type == SHAPE_TRANSPOSE) else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST";
else if (type == SHAPE_TRANSPOSE)
return "S_TRANSPOSE"; return "S_TRANSPOSE";
else if(type == SHAPE_UNSQUEEZE) else if (type == SHAPE_UNSQUEEZE)
return "S_UNSQUEEZE"; return "S_UNSQUEEZE";
else if (type == SORT_SORT)
return "S_SORT";
else if (type == SORT_TOPK)
return "S_TOPK";
}
else if ((type & FUNCTION_BASE) != 0){
if (type == FUNC_HARDTANH)
return "F_HARDTANH";
else if (type == FUNC_IDENTITY)
return "F_IDENTITY";
else if (type == FUNC_LOGSOFTMAX)
return "F_LOGSOFTMAX";
else if (type == FUNC_RECTIFY)
return "F_RECTIFY";
else if (type == FUNC_SIGMOID)
return "F_SIGMOID";
else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX";
} }
return "NULL"; return "NULL";
......
...@@ -28,8 +28,9 @@ ...@@ -28,8 +28,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_ARITHMETIC 0x00001000 /* math operations */
#define MATH_ABSOLUTE MATH_ARITHMETIC + 1 #define MATH_BASE 0x00001000
#define MATH_ABSOLUTE MATH_BASE + 1
#define MATH_MATRIXMUL MATH_ABSOLUTE + 1 #define MATH_MATRIXMUL MATH_ABSOLUTE + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
...@@ -42,28 +43,45 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -42,28 +43,45 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_POWER MATH_NORMALIZE + 1 #define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1 #define MATH_SCALEANDSHIFT MATH_POWER + 1
#define GETANDSET MATH_SCALEANDSHIFT + 1 #define REDUCE MATH_SCALEANDSHIFT + 1
#define GETANDSET_SELECT GETANDSET + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define REDUCE MOVEMENT_COPYVALUES + 1
#define REDUCE_REDUCEMAX REDUCE + 1 #define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1 #define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1 #define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1 #define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1
#define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1 #define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1
#define SHAPE REDUCE_REDUCEVARIANCE + 1 /* data and shape related operations */
#define DATA_BASE MATH_BASE * 2
#define GETANDSET DATA_BASE + 1
#define GETANDSET_SELECT GETANDSET + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define SHAPE MOVEMENT_COPYVALUES + 1
#define SHAPE_CONCATENATE SHAPE + 1 #define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1 #define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_PERMUTE SHAPE_MERGE + 1 #define SHAPE_MERGE_LIST SHAPE_MERGE + 1
#define SHAPE_PERMUTE SHAPE_MERGE_LIST + 1
#define SHAPE_SPLIT SHAPE_PERMUTE + 1 #define SHAPE_SPLIT SHAPE_PERMUTE + 1
#define SHAPE_TRANSPOSE SHAPE_SPLIT + 1 #define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1 #define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SORT SHAPE_UNSQUEEZE + 1
#define SORT_SORT SORT + 1
#define SORT_TOPK SORT_SORT + 1
/* activation functions */
#define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1
#define FUNC_IDENTITY FUNC_HARDTANH + 1
#define FUNC_LOGSOFTMAX FUNC_IDENTITY + 1
#define FUNC_RECTIFY FUNC_LOGSOFTMAX + 1
#define FUNC_SIGMOID FUNC_RECTIFY + 1
#define FUNC_SOFTMAX FUNC_SIGMOID + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type); const char * GetOPName(int type);
......
...@@ -134,8 +134,6 @@ constructor ...@@ -134,8 +134,6 @@ constructor
XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const float myDenseRatio, int myDevID, XMem * myMem) const float myDenseRatio, int myDevID, XMem * myMem)
{ {
CheckNTErrors((myOrder > 0), "Illegal tensor order1");
Init(); Init();
SetDataPointer(); SetDataPointer();
...@@ -144,7 +142,8 @@ XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYP ...@@ -144,7 +142,8 @@ XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYP
mem = myMem; mem = myMem;
devID = myMem != NULL ? myMem->devID : myDevID; devID = myMem != NULL ? myMem->devID : myDevID;
Resize(myOrder, myDimSize, myDataType, myDenseRatio); if(order >= 0)
Resize(myOrder, myDimSize, myDataType, myDenseRatio);
} }
/* copy constructor */ /* copy constructor */
...@@ -211,6 +210,9 @@ XTensor::~XTensor() ...@@ -211,6 +210,9 @@ XTensor::~XTensor()
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
DestroyData(); DestroyData();
if(grad != NULL)
delete grad;
} }
/* initialize member variables */ /* initialize member variables */
...@@ -237,7 +239,9 @@ void XTensor::Init() ...@@ -237,7 +239,9 @@ void XTensor::Init()
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM); memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false; isInit = false;
isTmp = false; isTmp = false;
isGrad = false;
visitMark = 0; visitMark = 0;
grad = NULL;
} }
/* delete data arrays */ /* delete data arrays */
...@@ -294,7 +298,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -294,7 +298,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
} }
else{ else{
DestroyData(); DestroyData();
if(isInit){ if(!isInit){
devID = tensor.devID; devID = tensor.devID;
mem = tensor.mem; mem = tensor.mem;
} }
...@@ -347,6 +351,9 @@ judge whether the two matrices are in the same type and size ...@@ -347,6 +351,9 @@ judge whether the two matrices are in the same type and size
*/ */
bool XTensor::IsIdentical(const XTensor * a, const XTensor * b) bool XTensor::IsIdentical(const XTensor * a, const XTensor * b)
{ {
if(a == NULL || b == NULL)
return false;
if(a->order != b->order) if(a->order != b->order)
return false; return false;
...@@ -1043,7 +1050,7 @@ int XTensor::GetNonzeroSize() ...@@ -1043,7 +1050,7 @@ int XTensor::GetNonzeroSize()
/* /*
set the tensor as "temporary" set the tensor as "temporary"
>> myIsTMP - flag >> myIsTMP - the flag
*/ */
void XTensor::SetTMP(bool myIsTmp) void XTensor::SetTMP(bool myIsTmp)
{ {
...@@ -1051,6 +1058,15 @@ void XTensor::SetTMP(bool myIsTmp) ...@@ -1051,6 +1058,15 @@ void XTensor::SetTMP(bool myIsTmp)
} }
/* /*
set the tensor as "keep-gradient"
>> myIsGrad - the flag
*/
void XTensor::SetGrad(bool myIsGrad)
{
isGrad = myIsGrad;
}
/*
resize a tensor with a specified tensor size resize a tensor with a specified tensor size
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - the size of each dimension >> myDimSize - the size of each dimension
...@@ -1105,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1105,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(isSparse){ if(isSparse){
/* /*
for sparse matrices, we use a list of tuple (key, value), for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrice as examples, ordered by key. Take a (2-dimensional) matrix as an example,
we have key = m * i + j; we have key = m * i + j;
The data array is The data array is
--------- ---------
...@@ -1148,9 +1164,9 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1148,9 +1164,9 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(filledData){ if(filledData){
/* allocate the new one */ /* allocate the new one */
if(mem == NULL){ if(mem == NULL){
data = (void*)new char[unitNum * unitSize]; data = XMemAlloc(devID, unitNum * unitSize);
#if defined(UNSAFE_BUT_FAST_MEM) #if defined(UNSAFE_BUT_FAST_MEM)
memset(data, 0, unitNum * unitSize); XMemSet(devID, data, 0, unitNum * unitSize);
#endif #endif
} }
else else
...@@ -1982,10 +1998,12 @@ generate a copy of XTensor ...@@ -1982,10 +1998,12 @@ generate a copy of XTensor
*/ */
XTensor * NewTensor(XTensor * a, bool isFilledData) XTensor * NewTensor(XTensor * a, bool isFilledData)
{ {
int dims[MAX_TENSOR_DIM_NUM];
CheckNTErrors((a != NULL), "Empty input!"); CheckNTErrors((a != NULL), "Empty input!");
int * dims = new int[a->order]; if(a->order > 0)
memcpy(dims, a->dimSize, sizeof(int) * a->order); memcpy(dims, a->dimSize, sizeof(int) * a->order);
if(!isFilledData) if(!isFilledData)
dims[0] = -dims[0]; dims[0] = -dims[0];
...@@ -1994,8 +2012,6 @@ XTensor * NewTensor(XTensor * a, bool isFilledData) ...@@ -1994,8 +2012,6 @@ XTensor * NewTensor(XTensor * a, bool isFilledData)
a->dataType, a->denseRatio, a->dataType, a->denseRatio,
a->devID, a->mem); a->devID, a->mem);
delete[] dims;
return newTensor; return newTensor;
} }
......
...@@ -139,8 +139,14 @@ public: ...@@ -139,8 +139,14 @@ public:
/* indicates whether the tensor is created temporarily */ /* indicates whether the tensor is created temporarily */
bool isTmp; bool isTmp;
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* mark for traversing the gragh */ /* mark for traversing the gragh */
unsigned int visitMark; unsigned int visitMark;
/* gradient (for back-propagation) */
XTensor * grad;
/* /*
the link used to form networks. Note that when we compute on tensors, we actually create a the link used to form networks. Note that when we compute on tensors, we actually create a
...@@ -300,6 +306,9 @@ public: ...@@ -300,6 +306,9 @@ public:
/* set the tensor as "temporary" */ /* set the tensor as "temporary" */
void SetTMP(bool myIsTmp = true); void SetTMP(bool myIsTmp = true);
/* set the tensor as "keep-gradient" */
void SetGrad(bool myIsGrad = true);
/* resize a matrix with a specified matrix size */ /* resize a matrix with a specified matrix size */
bool Resize(const int myOrder, const int * myDimSize, bool Resize(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE,
......
...@@ -32,9 +32,6 @@ ...@@ -32,9 +32,6 @@
#define USE_PTHREAD // for linux #define USE_PTHREAD // for linux
#endif #endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
////////////////////////////////////////////////// //////////////////////////////////////////////////
// neccessary libs // neccessary libs
#ifdef USE_PTHREAD #ifdef USE_PTHREAD
...@@ -46,12 +43,15 @@ namespace nts{ ...@@ -46,12 +43,15 @@ namespace nts{
#endif #endif
#endif #endif
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
#if(defined(_WIN32) && !defined (__CYGWIN__)) #if(defined(_WIN32) && !defined (__CYGWIN__))
#define CRFPP_USE_THREAD 1 #define CRFPP_USE_THREAD 1
#define BEGINTHREAD(src, stack, func, arg, flag, id) \ #define BEGINTHREAD(src, stack, func, arg, flag, id) \
(HANDLE)_beginthreadex((void *)(src), (unsigned)(stack), \ (HANDLE)_beginthreadex((void *)(src), (unsigned)(stack), \
(unsigned(_stdcall *)(void *))(func), (void *)(arg), \ (unsigned(_stdcall *)(void *))(func), (void *)(arg), \
(unsigned)(flag), (unsigned *)(id)) (unsigned)(flag), (unsigned *)(id))
#endif #endif
////////////////////////////////////////////////// //////////////////////////////////////////////////
......
...@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size) ...@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
} }
#ifdef USE_CUDA #ifdef USE_CUDA
else if(devIDT >= 0 && devIDS < 0){ else if(devIDT >= 0 && devIDS < 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyHostToDevice) == cudaSuccess), cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyHostToDevice);
"cudaMemcpy error (cudaMemcpyHostToDevice)"); if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyHostToDevice)");
}
} }
else if(devIDT < 0 && devIDS >= 0){ else if(devIDT < 0 && devIDS >= 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost) == cudaSuccess), cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost);
"cudaMemcpy error (cudaMemcpyDeviceToHost)"); if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyDeviceToHost)");
}
} }
else{ else{
//if(devIDT == devIDS){ //if(devIDT == devIDS){
...@@ -482,8 +486,9 @@ quick sorting ...@@ -482,8 +486,9 @@ quick sorting
NOTE: this means that the items may not placed in a continuous memory space NOTE: this means that the items may not placed in a continuous memory space
>> comp - the comparison function >> comp - the comparison function
*/ */
void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *)) void XQSort(void * dataA, void * dataB, void * index, int num, int width, int stride, int (*comp)(const void *, const void *))
{ {
XMemCopy(dataB, -1, dataA, -1, num * width);
char *lo, *hi; // ends of sub-array currently sorting char *lo, *hi; // ends of sub-array currently sorting
int *indexlo, *indexhi; int *indexlo, *indexhi;
char *mid; // points to middle of subarray char *mid; // points to middle of subarray
...@@ -502,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com ...@@ -502,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
stackptr = 0; stackptr = 0;
lo = (char*)data; lo = (char*)dataB;
hi = (char*)data + realStride * (num - 1); hi = (char*)dataB + realStride * (num - 1);
indexlo = (int*)index; indexlo = (int*)index;
indexhi = index != NULL ? (int*)index + stride * (num - 1) : NULL; indexhi = index != NULL ? (int*)index + stride * (num - 1) : NULL;
......
...@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime); ...@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime);
extern double GetClock(); extern double GetClock();
extern double GetClockSec(); extern double GetClockSec();
extern void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *)); extern void XQSort(void * dataA, void * dataB, void * index, int num, int width, int stride, int (*comp)(const void *, const void *));
extern int CompXFloat(const void * a, const void * b); extern int CompXFloat(const void * a, const void * b);
#ifdef USE_CUDA #ifdef USE_CUDA
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Absolute.h" #include "Absolute.h"
#include "Absolute.cuh" #include "Absolute.cuh"
...@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its absolute value set every entry to its absolute value
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Absolute(XTensor * a) void _Absolute(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaAbsolute(a); _CudaAbsolute(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)fabs(d[i]); db[i] = (DTYPE)fabs(d[i]);
}
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _AbsoluteMe(XTensor * a)
{
_Absolute(a, a);
}
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the absolute value of input tensor
*/
XTensor Absolute(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Absolute function */
_Absolute(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_ABSOLUTE);
return b;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its absolute value (CUDA Kernel) set each entry to its absolute value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelAbsolute(DTYPE * d, int size) void KernelAbsolute(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = fabs(d[i]); b[i] = fabs(a[i]);
} }
/* /*
set each entry to its absolute value (CUDA Kernel) set each entry to its absolute value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelAbsolute(__half * d, int size) void KernelAbsolute(__half * a, __half * b, int size)
{ {
return; return;
} }
/* /*
set each entry to its with float16 data type value set each entry to its absolute value
>> a - the tensor >> a - input tensor
>> b - output tensor
*/ */
extern "C" extern "C"
void _CudaAbsolute(XTensor * a) void _CudaAbsolute(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a) ...@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelAbsolute << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its absolute value (CUDA Kernel) */ /* set each entry to its absolute value (CUDA Kernel) */
__global__ __global__
void KernelAbsolute(DTYPE * d, int size); void KernelAbsolute(DTYPE * a, DTYPE * b, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/ /* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelAbsolute(__half * d, int size); void KernelAbsolute(__half * a, __half * b, int size);
/* set each entry to its absolute value */ /* set each entry to its absolute value */
extern "C" extern "C"
void _CudaAbsolute(XTensor * a); void _CudaAbsolute(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */ /* set every entry to its absolute value */
extern "C" void _Absolute(const XTensor * a, XTensor * b);
void _Absolute(XTensor * a);
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _AbsoluteMe(XTensor * a);
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Absolute(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -38,11 +38,11 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1] ...@@ -38,11 +38,11 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> beta - scalar >> beta - scalar
*/ */
void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta) XList * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input lists!"); CheckNTErrors(a && b && c, "Empty input lists!");
CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!"); CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");
if (a->count == 0) if (a->count == 0)
return; return;
......
...@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication in batch mode (CPU code) */ /* matrix multiplication in batch mode (CPU code) */
extern "C" extern "C"
void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication matrix multiplication c = trans(a) * trans(b) * alpha + c * beta
For the input tensors a and b, we perform matrix multiplication on the first two dimentsions. For the input tensors a and b, we perform matrix multiplication on the first two dimentsions.
E.g., let A be a tensor of size y * z * m and B be a tensor of size x * y * n. E.g., let A be a tensor of size y * z * m and B be a tensor of size x * y * n.
...@@ -66,8 +66,7 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -66,8 +66,7 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cn = c->dimSizeRDI[1]; int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0]; int cm = c->dimSizeRDI[0];
CheckNTErrors((am == bn && an == cn && bm == cm), CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!");
"Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1]; int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1]; int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1];
...@@ -80,13 +79,13 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -80,13 +79,13 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1; int cBlockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!"); CheckNTErrors(a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order], "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i]; aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i]; cBlockNum *= a->dimSizeRDI[i];
} }
for (int i = 2; i < b->order; i++) { for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors(b->dimSizeRDI[i] == c->dimSizeRDI[i], "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i]; bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i]; cBlockNum *= b->dimSizeRDI[i];
} }
...@@ -186,7 +185,7 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -186,7 +185,7 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
} }
/* /*
matrix multiplication (return a XTensor structure) matrix multiplication (return a XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
For the input tensors a and b, we perform matrix multiplication on the first two dimentsions. For the input tensors a and b, we perform matrix multiplication on the first two dimentsions.
...@@ -203,14 +202,13 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x ...@@ -203,14 +202,13 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
>> b - tensor b >> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed >> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient >> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
<< return - the result of matrix multiplication << return - the result of matrix multiplication
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner) const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!");
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -224,10 +222,10 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor ...@@ -224,10 +222,10 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
int order = a.order + b.order - 2; int order = a.order + b.order - 2;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
for (int i = 2; i < a.order; i++) for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i]; dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an; dimSize[sub++] = an;
dimSize[sub++] = bm; dimSize[sub++] = bm;
...@@ -236,14 +234,65 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor ...@@ -236,14 +234,65 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
c.SetTMP(); c.SetTMP();
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB); XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHead(&c, beta);
/* destroy variables */
delete[] dimSize;
return c;
}
/*
matrix multiplication with no transposition c = a * b * alpha
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication
*/
XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha, XPRunner * parallelRunner)
{
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
int an = a.dimSizeRDI[1];
int am = a.dimSizeRDI[0];
int bn = b.dimSizeRDI[1];
int bm = b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP();
/* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -26,8 +26,10 @@ ...@@ -26,8 +26,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MMul MatrixMul
/* /*
matrix multiplication matrix multiplication c = trans(a) * trans(b) * alpha + c * beta
For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions. For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions.
E.g., let A be a tensor of size y * z * m and B bea tensor of size x * y * n. E.g., let A be a tensor of size y * z * m and B bea tensor of size x * y * n.
...@@ -42,7 +44,7 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor ...@@ -42,7 +44,7 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/* /*
matrix multiplication (return a XTensor structure) matrix multiplication (return a XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor c to keep the result and return it make a new tensor c to keep the result and return it
For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions. For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions.
...@@ -55,8 +57,13 @@ C should be a tensor of z * x * n * m. ...@@ -55,8 +57,13 @@ C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y. Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MATRIXMUL_H__ #endif // __MATRIXMUL_H__
\ No newline at end of file
...@@ -45,9 +45,9 @@ where trans() return the transposed matrix if the flag is fired ...@@ -45,9 +45,9 @@ where trans() return the transposed matrix if the flag is fired
>> stream - the string for creating the job pipeline >> stream - the string for creating the job pipeline
*/ */
void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner, XStream * stream) XPRunner * parallelRunner, XStream * stream)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType), "Input tensors should have the same data type!"); CheckNTErrors((a->dataType == b->dataType), "Input tensors should have the same data type!");
......
...@@ -122,9 +122,8 @@ where trans() return the transposed matrix if the flag is fired ...@@ -122,9 +122,8 @@ where trans() return the transposed matrix if the flag is fired
>> stream - the string for creating the job pipeline >> stream - the string for creating the job pipeline
*/ */
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, XTensor * c, DTYPE alpha, DTYPE beta, XStream * stream)
DTYPE alpha, DTYPE beta, XStream * stream)
{ {
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0]; int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1]; int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
...@@ -158,9 +157,12 @@ void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -158,9 +157,12 @@ void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
cublasSetStream(*handle, stream->stream); cublasSetStream(*handle, stream->stream);
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT && c->dataType == X_FLOAT) { if (a->dataType == X_FLOAT && b->dataType == X_FLOAT && c->dataType == X_FLOAT) {
_CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType, b->data, transposedB, a->dataType, c->data, c->dataType, _CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType,
a->dimSize[0], a->dimSize[1], b->dimSize[0], b->dimSize[1], c->dimSize[0], c->dimSize[1], b->data, transposedB, a->dataType, c->data, c->dataType,
alpha, beta); a->dimSize[0], a->dimSize[1],
b->dimSize[0], b->dimSize[1],
c->dimSize[0], c->dimSize[1],
alpha, beta);
} }
else { else {
// TODO!! // TODO!!
......
...@@ -44,7 +44,7 @@ where trans() return the transposed matrix if the flag is fired ...@@ -44,7 +44,7 @@ where trans() return the transposed matrix if the flag is fired
*/ */
extern "C" extern "C"
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XStream * stream = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XStream * stream = NULL);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -32,7 +32,7 @@ c = trans(a) * trans(b) * alpha + c * beta ...@@ -32,7 +32,7 @@ c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
*/ */
void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -41,8 +41,8 @@ where trans() return the transposed matrix if the flag is fired ...@@ -41,8 +41,8 @@ where trans() return the transposed matrix if the flag is fired
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
*/ */
void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner) XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
......
...@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta ...@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired. where trans() return the transposed matrix if the flag is fired.
*/ */
extern "C" extern "C"
void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -46,9 +46,8 @@ where trans() returns the transposed matrix if the flag is fired ...@@ -46,9 +46,8 @@ where trans() returns the transposed matrix if the flag is fired
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
*/ */
void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
XPRunner * parallelRunner)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType), CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
...@@ -156,6 +155,7 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -156,6 +155,7 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
/* /*
matrix multiplication of the two tensors (do it on site) matrix multiplication of the two tensors (do it on site)
c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and for each 2-dimensional data array in a (denoted as ai) and
...@@ -168,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired. ...@@ -168,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired.
>> b - tensor b >> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed >> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient >> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors << return - the result of matrix multiplication of the two tensors
*/ */
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!");
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!"); CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
...@@ -190,24 +188,23 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const ...@@ -190,24 +188,23 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
int order = a.order; int order = a.order;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < a.order; i++) for (int i = 0; i < a.order - 2; i++)
dimSize[sub++] = a.dimSizeRDI[i]; dimSize[sub++] = a.dimSize[i];
dimSize[sub++] = an; dimSize[sub++] = an;
dimSize[sub++] = bm; dimSize[sub++] = bm;
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem); float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
c.SetZeroAll(); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMP();
/*call _MatrixMulBatched function */ /*call _MatrixMulBatched function */
_MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner); _MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB); XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHead(&c, beta);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication of the two tensors matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta
for each 2-dimensional data array in a (denoted as ai) and for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have each 2-dimensional data array in b (denoted as bi), we have
...@@ -35,10 +35,10 @@ ci = trans(ai) * trans(bi) * alpha + cm * beta ...@@ -35,10 +35,10 @@ ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired where trans() returns the transposed matrix if the flag is fired
*/ */
void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/* /*
matrix multiplication of the two tensors (return a XTensor structure) matrix multiplication of the two tensors (return a XTensor structure) c = trans(a) * trans(b) * alpha
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and for each 2-dimensional data array in a (denoted as ai) and
...@@ -47,7 +47,7 @@ ci = trans(ai) * trans(bi) * alpha + cm * beta ...@@ -47,7 +47,7 @@ ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired where trans() returns the transposed matrix if the flag is fired
*/ */
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -142,16 +142,15 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -142,16 +142,15 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
element-wise product of two tensors (return a XTensor structure) element-wise product of two tensors (return a XTensor structure)
make a new tensor c to keep the result and return it make a new tensor c to keep the result and return it
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i)
where i is the index of the item where i is the index of the item
>> a - tensor a >> a - tensor a
>> b - tensor b >> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
<< return - the product of the tensors << return - the product of the tensors
*/ */
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim) XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim)
{ {
CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!"); CheckNTErrors(a.dimSize[leadingDim] == b.dimSize[leadingDim], "TODO!");
...@@ -159,11 +158,10 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim ...@@ -159,11 +158,10 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
c.SetTMP(); c.SetTMP();
/* call _Multiply function */ /* call _Multiply function */
_Multiply(&a, &b, &c, alpha, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim); XLink::AddParamToHeadInt(&c, leadingDim);
return c; return c;
......
...@@ -44,10 +44,10 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0, int leadingDim ...@@ -44,10 +44,10 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0, int leadingDim
/* /*
element-wise product of two tensors (return a XTensor structure) element-wise product of two tensors (return a XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i)
where i is the index of the element where i is the index of the element
*/ */
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0, int leadingDim = 0); XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h" #include "Negate.h"
#include "Negate.cuh" #include "Negate.cuh"
...@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its minus value set every entry to its minus value
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Negate(XTensor * a) void _Negate(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaNegate(a); _CudaNegate(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = -d[i]; db[i] = -d[i];
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _NegateMe(XTensor * a)
{
_Negate(a, a);
} }
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor Negate(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Negate function */
_Negate(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its negtive value (CUDA Kernel) set each entry to its negtive value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelNegate(DTYPE * d, int size) void KernelNegate(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = -d[i]; b[i] = -a[i];
} }
/* /*
set each entry to its negtive value (CUDA Kernel) set each entry to its negtive value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelNegate(__half * d, int size) void KernelNegate(__half * a, __half * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size) if (i < size)
d[i] = __hsub(__float2half(0), d[i]); b[i] = __hsub(__float2half(0), a[i]);
#else #else
if (i < size) if (i < size)
d[i] = __float2half(-__half2float(d[i])); b[i] = __float2half(-__half2float(a[i]));
#endif #endif
} }
/* /*
set each entry to its negtive value set each entry to its negtive value
>> a - the tensor >> a - input tensor
>> b - output tensor
*/ */
extern "C" extern "C"
void _CudaNegate(XTensor * a) void _CudaNegate(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a) ...@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelNegate << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelNegate << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its negtive value (CUDA Kernel) */ /* set each entry to its negtive value (CUDA Kernel) */
__global__ __global__
void KernelNegate(DTYPE * d, int size); void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/ /* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelNegate(__half * d, int size); void KernelNegate(__half * a, __half * b, int size);
/* set each entry to its negtive value */ /* set each entry to its negtive value */
extern "C" extern "C"
void _CudaNegate(XTensor * a); void _CudaNegate(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */ /* set every entry to its minus value */
extern "C" void _Negate(const XTensor * a, XTensor * b);
void _Negate(XTensor * a);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h" #include "Sign.h"
#include "Sign.cuh" #include "Sign.cuh"
...@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its sign value set every entry to its sign value
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Sign(XTensor * a) void _Sign(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaSign(a); _CudaSign(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) { for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0) if (d[i] > 0)
d[i] = 1.0F; db[i] = 1.0F;
else if (d[i] == 0) else if (d[i] == 0)
d[i] = 0.0F; db[i] = 0.0F;
else else
d[i] = -1.0F; db[i] = -1.0F;
} }
} }
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _SignMe(XTensor * a)
{
_Sign(a, a);
}
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor Sign(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _ScaleAndShift function */
_Sign(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its sign value (CUDA Kernel) set each entry to its sign value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSign(DTYPE * d, int size) void KernelSign(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) { if (i < size) {
if (d[i] > 0) if (a[i] > 0)
d[i] = 1.0F; b[i] = 1.0F;
else if (d[i] == 0) else if (a[i] == 0)
d[i] = 0.0F; b[i] = 0.0F;
else else
d[i] = -1.0F; b[i] = -1.0F;
} }
} }
/* /*
set each entry to its sign value (CUDA Kernel) set each entry to its sign value with float16 data type value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSign(__half * d, int size) void KernelSign(__half * a, __half * b, int size)
{ {
return; return;
} }
/* /*
set each entry to its with float16 data type value set each entry to its sign value
>> a - the tensor >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
extern "C" extern "C"
void _CudaSign(XTensor * a) void _CudaSign(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a) ...@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelSign << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelSign << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its sign value (CUDA Kernel) */ /* set each entry to its sign value (CUDA Kernel) */
__global__ __global__
void KernelSign(DTYPE * d, int size); void KernelSign(DTYPE * a, DTYPE * b, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/ /* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelSign(__half * d, int size); void KernelSign(__half * a, __half * b, int size);
/* set each entry to its sign value */ /* set each entry to its sign value */
extern "C" extern "C"
void _CudaSign(XTensor * a); void _CudaSign(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */ /* set every entry to its sign value */
extern "C" void _Sign(const XTensor * a, XTensor * b);
void _Sign(XTensor * a);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _SignMe(XTensor * a);
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Sign(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -37,8 +37,8 @@ c = trans(a) * trans(b) * \alpha + c * \beta ...@@ -37,8 +37,8 @@ c = trans(a) * trans(b) * \alpha + c * \beta
>> c - output matrix (2d tensor) >> c - output matrix (2d tensor)
*/ */
void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta) XTensor * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
......
...@@ -32,11 +32,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -32,11 +32,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void _CudaBLASMatrixMUL(cublasHandle_t * handle, void _CudaBLASMatrixMUL(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void * c, TENSOR_DATA_TYPE dataTypeC, void * c, TENSOR_DATA_TYPE dataTypeC,
int na, int ma, int nb, int mb, int nc, int mc, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha, DTYPE beta) DTYPE alpha, DTYPE beta)
{ {
/* /*
matrxi-matrix multiplication matrxi-matrix multiplication
...@@ -89,11 +89,11 @@ void _CudaBLASMatrixMUL(cublasHandle_t * handle, ...@@ -89,11 +89,11 @@ void _CudaBLASMatrixMUL(cublasHandle_t * handle,
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void _CudaBLASMatrixMULBatched(cublasHandle_t * handle, void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void ** c, TENSOR_DATA_TYPE dataTypeC, void ** c, TENSOR_DATA_TYPE dataTypeC,
int count, int na, int ma, int nb, int mb, int nc, int mc, int count, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha, DTYPE beta) DTYPE alpha, DTYPE beta)
{ {
/* /*
matrxi-matrix multiplication matrxi-matrix multiplication
...@@ -145,11 +145,11 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle, ...@@ -145,11 +145,11 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C" extern "C"
void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC, void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC,
int count, int na, int ma, int nb, int mb, int nc, int mc, int count, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha, DTYPE beta) DTYPE alpha, DTYPE beta)
{ {
/* /*
matrxi-matrix multiplication matrxi-matrix multiplication
...@@ -202,10 +202,10 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -202,10 +202,10 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void _CudaBLASMatrixMULList(cublasHandle_t * handle, void _CudaBLASMatrixMULList(cublasHandle_t * handle,
const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, XList * c,
int count, DTYPE alpha, DTYPE beta) int count, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input lists!"); CheckNTErrors((a && b && c), "Empty input lists!");
CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!"); CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!");
...@@ -298,12 +298,12 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -298,12 +298,12 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice); cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice);
_CudaBLASMatrixMULBatched(handle, _CudaBLASMatrixMULBatched(handle,
(const void**)apGPU, transposedA, a0->dataType, (const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType, (const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count, (void**)cpGPU, c0->dataType, a->count,
a0->dimSize[0], a0->dimSize[1], a0->dimSize[0], a0->dimSize[1],
b0->dimSize[0], b0->dimSize[1], b0->dimSize[0], b0->dimSize[1],
c0->dimSize[0], c0->dimSize[1], alpha, beta); c0->dimSize[0], c0->dimSize[1], alpha, beta);
delete[] ap; delete[] ap;
delete[] bp; delete[] bp;
delete[] cp; delete[] cp;
......
...@@ -28,38 +28,42 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,38 +28,42 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication (BLAS) */ /* matrix multiplication (BLAS) */
extern "C" extern "C"
void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
#ifdef USE_CUDA #ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */ /* matrix multiplication via cuda version BLAS */
extern "C" extern "C"
void _CudaBLASMatrixMUL(cublasHandle_t * handle, void _CudaBLASMatrixMUL(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void * c, TENSOR_DATA_TYPE dataTypeC, void * c, TENSOR_DATA_TYPE dataTypeC,
int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C" extern "C"
void _CudaBLASMatrixMULBatched(cublasHandle_t * handle, void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void ** c, TENSOR_DATA_TYPE dataTypeC, void ** c, TENSOR_DATA_TYPE dataTypeC,
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C" extern "C"
void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC, void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC,
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C" extern "C"
void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
#endif #endif
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
int order = a.order; int order = a.order;
int * dimSize = new int[order]; int * dimSize = new int[order];
CheckNTErrors(&a != NULL, "Empty input tensors!");
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!"); CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
CheckNTErrors(low < high, "Illegal range specified!"); CheckNTErrors(low < high, "Illegal range specified!");
...@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
dimSize[i] = a.dimSize[i]; dimSize[i] = a.dimSize[i];
} }
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem); float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
c.SetZeroAll(); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMP();
/* call _SelectRange function */ /* call _SelectRange function */
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
*/ */
#include "SetData.h" #include "SetData.h"
#include "SetData.cuh"
#include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 ) #if !defined( WIN32 ) && !defined( _WIN32 )
...@@ -35,13 +37,151 @@ ...@@ -35,13 +37,151 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate data items with a fixed value p
>> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor
*/
void _SetDataFixed(XTensor * tensor, void * valuePointer)
{
int num = tensor->unitNum;
if(tensor->dataType == X_INT){
int p = *(int*)valuePointer;
if(tensor->devID < 0){
int * d = (int*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
}
}
else{
#ifdef USE_CUDA
CudaSetDataFixedInt(tensor, p);
#endif
}
}
else if(tensor->dataType == X_FLOAT){
float p = *(float*)valuePointer;
if(tensor->devID < 0){
float * d = (float*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
}
}
else{
#ifdef USE_CUDA
CudaSetDataFixedFloat(tensor, p);
#endif
}
}
else if(tensor->dataType == X_DOUBLE){
double p = *(double*)valuePointer;
if(tensor->devID < 0){
double * d = (double*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
}
}
else{
#ifdef USE_CUDA
CudaSetDataFixedDouble(tensor, p);
#endif
}
}
else{
ShowNTErrors("TODO");
}
}
/*
generate data items with a fixed value p (in default type)
>> tensor - the tensor whose data array would be initialized
>> p - number in default type
*/
void SetDataFixed(XTensor &tensor, DTYPE p)
{
_SetDataFixed(&tensor, &p);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
*/
void _SetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void _SetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_INT");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
*/
void _SetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_INT");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/* /*
generate data items with a uniform distribution in [low,high] generate data items with a uniform distribution in [low,high]
>> tensor - the tensor whose data array would be initialized >> tensor - the tensor whose data array would be initialized
>> low - lower value of the range >> low - lower value of the range
>> high - higher value of the range >> high - higher value of the range
*/ */
void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high) void _SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
{ {
if(tensor == NULL) if(tensor == NULL)
return; return;
...@@ -76,7 +216,7 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high) ...@@ -76,7 +216,7 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
*/ */
else{ else{
XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1); XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
SetDataRand(t2, low, high); _SetDataRand(t2, low, high);
_CopyValues(t2, tensor); _CopyValues(t2, tensor);
delete t2; delete t2;
} }
......
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
*/
#include "SetData.cuh"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set an integer data array with a fixed value p (in int)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedInt(int * d, int size, int p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in int)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void CudaSetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a float data array with a fixed value p (in int)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void CudaSetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a double data array with a fixed value p (in int)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedDouble(double * d, int size, double p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void CudaSetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
} // namespace nts(NiuTrans.Tensor)
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
*/
#ifndef __SETDATA_CUH__
#define __SETDATA_CUH__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */
void CudaSetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void CudaSetDataFixedDouble(XTensor * tensor, double p);
} // namespace nts(NiuTrans.Tensor)
#endif // __SETDATA_CUH__
\ No newline at end of file
...@@ -28,28 +28,25 @@ ...@@ -28,28 +28,25 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p */ /* generate data items with a fixed value p */
extern "C" void _SetDataFixed(XTensor * tensor, void * valuePointer);
void SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */
void SetDataFixed(XTensor &tensor, DTYPE p);
/* generate data items with a fixed value p (in int) */ /* generate data items with a fixed value p (in int) */
extern "C" void _SetDataFixedInt(XTensor * tensor, int p);
void SetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */ /* generate data items with a fixed value p (in float) */
extern "C" void _SetDataFixedFloat(XTensor * tensor, float p);
void SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */ /* generate data items with a fixed value p (in double) */
extern "C" void _SetDataFixedDouble(XTensor * tensor, double p);
void SetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a uniform distribution in [low,high] */ /* generate data items with a uniform distribution in [low,high] */
extern "C" void _SetDataRand(XTensor * tensor, DTYPE low, DTYPE high);
void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high);
/* generate data items with a normal distribution with specified mean and standard deviation */ /* generate data items with a normal distribution with specified mean and standard deviation */
extern "C" void _SetDataRandN(XTensor * tensor, DTYPE mean, DTYPE standardDeviation);
void SetDataRandN(XTensor * tensor, DTYPE mean, DTYPE standardDeviation);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Log.h" #include "Log.h"
#include "Log.cuh" #include "Log.cuh"
#include <math.h> #include <math.h>
...@@ -27,22 +28,55 @@ ...@@ -27,22 +28,55 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its log value set every entry to its log value (do it on site)
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Log(XTensor * a) void _Log(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaLog(a); _CudaLog(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)log(d[i]); db[i] = (DTYPE)log(d[i]);
}
/*
set every entry to its log value
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _LogMe(XTensor * a)
{
_Log(a, a);
}
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the log value of the input tensor
*/
XTensor Log(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Log function */
_Log(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_LOG);
return b;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its log value (CUDA Kernel) set each entry to its log value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelLog(DTYPE * d, int size) void KernelLog(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = log(d[i]); b[i] = log(a[i]);
} }
/* /*
set each entry to its log value (CUDA Kernel) set each entry to its log value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelLog(__half * d, int size) void KernelLog(__half * a, __half * b, int size)
{ {
return; return;
} }
/* /*
set each entry to its log value set each entry to its log value
>> a - the tensor >> a - input tensor
>> b - output tensor
*/ */
extern "C" extern "C"
void _CudaLog(XTensor * a) void _CudaLog(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a) ...@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelLog << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelLog << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelLog << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelLog << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its log value (CUDA Kernel) */ /* set each entry to its log value (CUDA Kernel) */
__global__ __global__
void KernelLog(DTYPE * d, int size); void KernelLog(DTYPE * a, DTYPE * b, int size);
/* set each entry to its log value (CUDA Kernel) with float16 data type*/ /* set each entry to its log value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelLog(__half * d, int size); void KernelLog(__half * a, __half * b, int size);
/* set each entry to its log value */ /* set each entry to its log value */
extern "C" extern "C"
void _CudaLog(XTensor * a); void _CudaLog(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */ /* set every entry to its log value */
extern "C" void _Log(const XTensor * a, XTensor * b);
void _Log(XTensor * a);
/*
set every entry to its log value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _LogMe(XTensor * a);
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Log(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
...@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
get the power(a, p) get the power(a, p)
>> a - the tensor >> a - input tensor
>> p - as it is >> b - output tensor
>> p - parameter
*/ */
void _Power(XTensor * a, DTYPE p) void _Power(const XTensor * a, XTensor * b, DTYPE p)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaPower(a, p); _CudaPower(a, b, p);
return; return;
} }
#endif #endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * aData = (DTYPE*)a->data;
DTYPE * bData = (DTYPE*)b->data;
if (p == 0) { if (p == 0) {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)1.0; bData[i] = (DTYPE)1.0;
} }
else if (p == (DTYPE)0.5) { else if (p == (DTYPE)0.5) {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)sqrt(d[i]); bData[i] = (DTYPE)sqrt(aData[i]);
} }
else if (p == (DTYPE)2.0) { else if (p == (DTYPE)2.0) {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = d[i] * d[i]; bData[i] = aData[i] * aData[i];
} }
else { else {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)pow(d[i], p); bData[i] = (DTYPE)pow(aData[i], p);
} }
} }
/*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void _PowerMe(XTensor * a, DTYPE p)
{
_Power(a, a, p);
}
/*
get the power(a, p) (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> p - parameter
<< return - the power value of the input tensor
*/
XTensor Power(const XTensor & a, DTYPE p)
{
XTensor b(&a);
b.SetTMP();
/* call _Power function */
_Power(&a, &b, p);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_POWER);
XLink::AddParamToHead(&b, p);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../movement/CopyValues.cuh"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
...@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set all entries to its root (CUDA Kernel) set all entries to its root (CUDA Kernel)
>> d - data array >> a - input data array
>> b - output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSqrtV2(DTYPE * d, int size) void KernelSqrtV2(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = sqrt(d[i]); b[i] = sqrt(a[i]);
} }
/* /*
set all entries to its root (CUDA Kernel) set all entries to its root (CUDA Kernel)
>> d - data array >> a - input data array
>> b - output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSqrtV2(__half * d, int size) void KernelSqrtV2(__half * a, __half * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size) if (i < size)
d[i] = hsqrt(d[i]); b[i] = hsqrt(a[i]);
#else #else
if (i < size) if (i < size)
d[i] = __float2half(sqrt(__half2float(d[i]))); b[i] = __float2half(sqrt(__half2float(a[i])));
#endif #endif
} }
/* /*
get power(d[i], p) get power(d[i], p)
>> d - data array >> a - input data array
>> b - output data array
>> p - power >> p - power
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelPower(DTYPE * d, DTYPE p, int size) void KernelPower(DTYPE * a, DTYPE * b, DTYPE p, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = pow(d[i], p); b[i] = pow(a[i], p);
} }
/* /*
get power(d[i], p) get power(d[i], p)
>> d - data array >> a - input data array
>> b - output data array
>> p - power >> p - power
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelPower(__half * d, __half p, int size) void KernelPower(__half * a, __half * b, __half p, int size)
{ {
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#else #else
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = __float2half(pow(__half2float(d[i]), __half2float(p))); b[i] = __float2half(pow(__half2float(a[i]), __half2float(p)));
#endif #endif
} }
/* get the power of the entries */ /* get the power of the entries */
extern "C" extern "C"
void _CudaPower(XTensor * a, DTYPE p) void _CudaPower(const XTensor * a, XTensor * b, DTYPE p)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
...@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p) ...@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p)
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
if (p == (DTYPE)0.5) { if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (p == (DTYPE)1.0) {
_CudaCopyValues(a, b);
} }
else if (p != (DTYPE)1.0) { else if (p != (DTYPE)1.0) {
KernelPower << <blocks, threads >> >((DTYPE*)a->data, p, a->unitNum); KernelPower << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, p, a->unitNum);
} }
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
if (p == (DTYPE)0.5) { if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelSqrtV2 << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else if (p != (DTYPE)1.0) { else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set all entries to its root (CUDA Kernel) */ /* set all entries to its root (CUDA Kernel) */
__global__ __global__
void KernelSqrtV2(DTYPE * d, int size); void KernelSqrtV2(DTYPE * a, DTYPE * b, int size);
/* set all entries to its root (CUDA Kernel) */ /* set all entries to its root (CUDA Kernel) */
__global__ __global__
void KernelSqrtV2(__half * d, int size); void KernelSqrtV2(__half * a, __half * b, int size);
/* get the power of the entries */ /* get the power of the entries */
extern "C" extern "C"
void _CudaPower(XTensor * a, DTYPE p); void _CudaPower(const XTensor * a, XTensor * b, DTYPE p);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* get the power(x, y) */ /* get the power(x, y) */
extern "C" void _Power(const XTensor * a, XTensor * b, DTYPE p);
void _Power(XTensor * a, DTYPE p);
/*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void _PowerMe(XTensor * a, DTYPE p);
/*
get the power(x, y) (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Power(const XTensor & a, DTYPE p);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it ...@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it
*/ */
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum) XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
{ {
CheckNTErrors(&s, "Empty input tensor!"); CheckNTErrors(dim >= 0 && dim < s.order, "A too larget dimension specified!");
CheckNTErrors((dim >= 0 && dim < s.order), "A too larget dimension specified!");
int order = s.order; int order = s.order;
int * dimSize = new int[order]; int * dimSize = new int[order];
...@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in ...@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
dimSize[i] = s.dimSize[i]; dimSize[i] = s.dimSize[i];
} }
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem); float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
t.SetZeroAll(); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMP();
/* call _CopyIndexed function */ /* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* destroy variables */
delete[] dimSize;
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED); XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim); XLink::AddParamToHeadInt(&t, dim);
...@@ -140,7 +136,10 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in ...@@ -140,7 +136,10 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
XLink::AddParamToHeadInt(&t, indexSize); XLink::AddParamToHeadInt(&t, indexSize);
XLink::AddParamToHeadPointer(&t, tgtIndex); XLink::AddParamToHeadPointer(&t, tgtIndex);
XLink::AddParamToHeadInt(&t, copyNum); XLink::AddParamToHeadInt(&t, copyNum);
/* destroy variables */
delete[] dimSize;
return t; return t;
} }
......
...@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it ...@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it
*/ */
XTensor ReduceMax(const XTensor &input, int dim) XTensor ReduceMax(const XTensor &input, int dim)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceMax function */ /* call _ReduceMax function */
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
/* destroy variables */
delete[] dimSize;
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
/* destroy variables */
delete[] dimSize;
return output; return output;
} }
......
...@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/ */
XTensor ReduceMean(const XTensor &input, int dim) XTensor ReduceMean(const XTensor &input, int dim)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceMean function */ /* call _ReduceMean function */
...@@ -87,4 +86,4 @@ XTensor ReduceMean(const XTensor &input, int dim) ...@@ -87,4 +86,4 @@ XTensor ReduceMean(const XTensor &input, int dim)
return output; return output;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
*/ */
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp) XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceSum function */ /* call _ReduceSum function */
...@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power); XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
/* destroy variables */
delete[] dimSize;
return output;
}
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i)^power if isExp == false
sum = \sum_i exp((a_i)^power) if isExp == true
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
<< return - the sum along a dimension of the tensor
*/
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP();
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp);
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -43,7 +43,16 @@ For a 1-dimensional data array a, ...@@ -43,7 +43,16 @@ For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false); XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i) if isExp == false
sum = \sum_i exp(a_i) if isExp == true
*/
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -54,25 +54,24 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2 ...@@ -54,25 +54,24 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
*/ */
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceSumSquared function */ /* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift); _ReduceSumSquared(&input, &output, dim, &shift);
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
...@@ -83,4 +82,4 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) ...@@ -83,4 +82,4 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
return output; return output;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../../XName.h"
#include "../math/ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceVariance.h" #include "ReduceVariance.h"
...@@ -56,24 +57,27 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -56,24 +57,27 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean) XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceVariance function */ /* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
/* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -68,8 +68,7 @@ or "Merge" by means of the tensor shapes ...@@ -68,8 +68,7 @@ or "Merge" by means of the tensor shapes
*/ */
XTensor Concatenate(const XList &smalls, int dim) XTensor Concatenate(const XList &smalls, int dim)
{ {
CheckNTErrors(&smalls != NULL, "Invalid list!"); CheckNTErrors(smalls.count > 0, "Empty list!");
CheckNTErrors((smalls.count > 0), "Empty list!");
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!"); CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
bool uniform = true; bool uniform = true;
...@@ -80,40 +79,35 @@ XTensor Concatenate(const XList &smalls, int dim) ...@@ -80,40 +79,35 @@ XTensor Concatenate(const XList &smalls, int dim)
if (!XTensor::IsIdentical(a, b)) if (!XTensor::IsIdentical(a, b))
uniform = false; uniform = false;
} }
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
int * dimSize = new int[order];
int * dimSize;
if (uniform) { if (uniform) {
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
dimSize = new int[order];
for (int i = 0; i < tensor->order; i++) { for (int i = 0; i < tensor->order; i++) {
if (i != dim) if (i != dim)
dimSize[i] = tensor->dimSize[i]; dimSize[i] = tensor->dimSize[i];
else else
dimSize[i] = tensor->dimSize[dim] * smalls.count; dimSize[i] = tensor->dimSize[dim] * smalls.count;
} }
XTensor big = XTensor(order, dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
big.SetZeroAll(); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMP();
/* call _Merge function */ /* call _Merge function */
_Merge(&smalls, &big, dim); _Merge(&smalls, &big, dim);
///* tensor connection */ /* tensor connection */
//XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE); XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
//XLink::AddParamToHead(&big, dim); XLink::AddParamToHeadInt(&big, dim);
/* destroy variables */
delete dimSize;
/* destroy variables */
delete[] dimSize;
return big; return big;
} }
else { else {
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
dimSize = new int[order];
for (int i = 0; i < tensor->order; i++) for (int i = 0; i < tensor->order; i++)
if (i != dim) if (i != dim)
dimSize[i] = tensor->dimSize[i]; dimSize[i] = tensor->dimSize[i];
...@@ -125,15 +119,19 @@ XTensor Concatenate(const XList &smalls, int dim) ...@@ -125,15 +119,19 @@ XTensor Concatenate(const XList &smalls, int dim)
} }
dimSize[dim] = catDimSize; dimSize[dim] = catDimSize;
XTensor big = NewTensor(order, dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem); float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
big.SetZeroAll(); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMP();
/* call _ConcatenateSolely function */ /* call _ConcatenateSolely function */
_ConcatenateSolely(&smalls, &big, dim); _ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
/* destroy variables */ /* destroy variables */
delete dimSize; delete[] dimSize;
return big; return big;
} }
...@@ -168,12 +166,76 @@ make a new tensor to keep the result and return it. ...@@ -168,12 +166,76 @@ make a new tensor to keep the result and return it.
*/ */
XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim) XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
{ {
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
XList smalls(2); XList smalls(2);
smalls.Add(&smallA); smalls.Add(&smallA);
smalls.Add(&smallB); smalls.Add(&smallB);
/* call Concatenate function */ bool uniform = true;
return Concatenate(smalls, dim); for (int i = 1; i < smalls.count; i++) {
XTensor * a = (XTensor*)smalls.Get(i - 1);
XTensor * b = (XTensor*)smalls.Get(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsIdentical(a, b))
uniform = false;
}
XTensor * tensor = (XTensor*)smalls.Get(0);
int order = tensor->order;
int * dimSize = new int[order];
if (uniform) {
for (int i = 0; i < tensor->order; i++) {
if (i != dim)
dimSize[i] = tensor->dimSize[i];
else
dimSize[i] = tensor->dimSize[dim] * smalls.count;
}
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP();
/* call _Merge function */
_Merge(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim);
/* destroy variables */
delete[] dimSize;
return big;
}
else {
for (int i = 0; i < tensor->order; i++)
if (i != dim)
dimSize[i] = tensor->dimSize[i];
int catDimSize = 0;
for (int i = 0; i < smalls.count; i++) {
XTensor * tensor = (XTensor*)smalls.Get(i);
catDimSize += tensor->dimSize[dim];
}
dimSize[dim] = catDimSize;
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP();
/* call _ConcatenateSolely function */
_ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
/* destroy variables */
delete[] dimSize;
return big;
}
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension ...@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension
*/ */
void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim) void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
{ {
CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!"); CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!");
int catDimSize = 0; int catDimSize = 0;
int dimRDI = big->order - dim - 1; int dimRDI = big->order - dim - 1;
......
...@@ -36,7 +36,7 @@ set target data block index for the data movement in merge ...@@ -36,7 +36,7 @@ set target data block index for the data movement in merge
>> mem - the memory pool >> mem - the memory pool
*/ */
void _MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge, void _MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem) int splitSizeInGrid, int gridSize, int gridNum, XMem * mem)
{ {
if (mem != NULL && mem->devID >= 0) { if (mem != NULL && mem->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
......
...@@ -40,7 +40,7 @@ set target data block index for the data movement in split (device code) ...@@ -40,7 +40,7 @@ set target data block index for the data movement in split (device code)
*/ */
__global__ __global__
void KernelMakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge, void KernelMakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum) int splitSizeInGrid, int gridSize, int gridNum)
{ {
/* block index */ /* block index */
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -71,8 +71,8 @@ set target data block index for the data movement in split ...@@ -71,8 +71,8 @@ set target data block index for the data movement in split
*/ */
extern "C" extern "C"
void _CudaMakeMergeBlockIndex(int devID, void _CudaMakeMergeBlockIndex(int devID,
int * blockIndex, int blockNum, int blockNumInMerge, int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum) int splitSizeInGrid, int gridSize, int gridNum)
{ {
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
......
...@@ -30,9 +30,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,9 +30,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */ /* set target data block index for the data movement in split */
extern "C" extern "C"
void _CudaMakeMergeBlockIndex(int devID, void _CudaMakeMergeBlockIndex(int devID, int * blockIndex, int blockNum, int blockNumInMerge,
int * blockIndex, int blockNum, int blockNumInMerge, int splitSizeInGrid, int gridSize, int gridNum);
int splitSizeInGrid, int gridSize, int gridNum);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in merge */ /* set target data block index for the data movement in merge */
extern "C" extern "C"
void _MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge, void _MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem); int splitSizeInGrid, int gridSize, int gridNum, XMem * mem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M) ...@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M)
*/ */
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
{ {
CheckNTErrors(&s != NULL, "Invalid tensors!"); CheckNTErrors(leadingDim < whereToMerge, "Invalid leading dimension!");
CheckNTErrors((leadingDim < whereToMerge), "Invalid leading dimension!");
if (leadingDim < 0) if (leadingDim < 0)
leadingDim = 0; leadingDim = 0;
...@@ -180,13 +179,18 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) ...@@ -180,13 +179,18 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
} }
} }
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem); float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
t.SetZeroAll(); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMP();
/* call _Merge function */ /* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim); _Merge(&s, &t, whereToMerge, leadingDim);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge);
XLink::AddParamToHeadInt(&t, leadingDim);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -327,13 +331,58 @@ XTensor Merge(const XList &smalls, int whereToMerge) ...@@ -327,13 +331,58 @@ XTensor Merge(const XList &smalls, int whereToMerge)
dimSize[i] = tensor->dimSize[whereToMerge] * smalls.count; dimSize[i] = tensor->dimSize[whereToMerge] * smalls.count;
} }
XTensor big = NewTensor(order, dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem); float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
big.SetZeroAll(); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP();
/* call _Merge function */
_Merge(&smalls, &big, whereToMerge);
/* tensor connections */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge);
/* destroy variables */
delete[] dimSize;
return big;
}
/*
merge two tensors into a big tensor (return a XTensor structure)
>> smalls - the list of the small tensors
>> whereToMerge - the merging operation is along with which dimension
<< return - the big tensor merged by small tensors
*/
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
{
CheckNTErrors(XTensor::IsIdentical(&smallA, &smallB),
"The two tensors must be of the same size!");
int order = smallA.order;
int * dimSize = new int[order];
for (int i = 0; i < smallA.order; i++) {
if (i != whereToMerge)
dimSize[i] = smallA.dimSize[i];
else
dimSize[i] = smallA.dimSize[whereToMerge] * 2;
}
float dr = (!smallA.isSparse) ? 1.0F : smallA.denseRatio;
XTensor big(order, dimSize, smallA.dataType, dr, smallA.devID, smallA.mem);
big.SetTMP(); big.SetTMP();
XList smalls(2);
smalls.Add(&smallA);
smalls.Add(&smallB);
/* call _Merge function */ /* call _Merge function */
_Merge(&smalls, &big, whereToMerge); _Merge(&smalls, &big, whereToMerge);
/* tensor connections */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -29,22 +29,19 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,22 +29,19 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */ /* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1); void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1);
/* /* transform a tensor by merging it alone with a dimension (return a XTensor structure)
transform a tensor by merging it alone with a dimension (return a XTensor structure). e.g., (M, N/3, 3) -> (M, N) */
make a new tensor to keep the result and return it.
e.g., (M, N/3, 3) -> (M, N)
*/
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1); XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */ /* merge small tensors into a big tensor */
void _Merge(const XList * smalls, XTensor * big, int whereToMerge); void _Merge(const XList * smalls, XTensor * big, int whereToMerge);
/* /* merge small tensors into a big tensor (return a XTensor structure) */
merge small tensors into a big tensor (return a XTensor structure).
make a new tensor to keep the result and return it.
*/
XTensor Merge(const XList &smalls, int whereToMerge); XTensor Merge(const XList &smalls, int whereToMerge);
/* merge two tensors into a big tensor (return a XTensor structure) */
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MERGE_H__ #endif // __MERGE_H__
\ No newline at end of file
...@@ -19,10 +19,12 @@ ...@@ -19,10 +19,12 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "Split.h" #include "Split.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "../../XName.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../movement/CopyBlocksOnSite.h" #include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -146,20 +148,25 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -146,20 +148,25 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
int order = s.order + 1; int order = s.order + 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = splitNum;
for (int i = 0; i < s.order; i++) { for (int i = 0; i < s.order; i++) {
if (i == whereToSplit) if (i == whereToSplit)
dimSize[i] = s.dimSize[i] / splitNum; dimSize[i+1] = s.dimSize[i] / splitNum;
else else
dimSize[i] = s.dimSize[i]; dimSize[i+1] = s.dimSize[i];
} }
dimSize[-1] = splitNum;
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetZeroAll();
t.SetTMP(); t.SetTMP();
/* call _Split function */ /* call _Split function */
_Split(&s, &t, whereToSplit, splitNum); _Split(&s, &t, whereToSplit, splitNum);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit);
XLink::AddParamToHeadInt(&t, splitNum);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -168,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -168,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
} }
/* /*
split a big tensor into small tensors. split a big tensor into small tensors
>> big - the source tensor >> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return) >> smalls - the list that keeps the resulting tensors (for return)
...@@ -274,43 +281,29 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum) ...@@ -274,43 +281,29 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
} }
/* /*
split a big tensor into small tensors (returna a XList struture). split a big tensor into small tensors
make a new list to keep the result and return it.
>> big - the source tensor >> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return)
NOTE that all the "small" tensors have already been placed in the list in advance.
>> whereToSplit - which dimension of the tensor is to split >> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits >> splitNum - how many splits
<< return - a list of small tensors by splitting a big tensor
*/ */
XList SplitList(const XTensor &big, int whereToSplit, int splitNum) void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum)
{ {
CheckNTErrors(&big, "Invalid tensors!");
XList smalls = XList(splitNum);
int order = big.order;
int * dimSize = new int[order];
for (int i = 0; i < big.order; i++) {
if (i != whereToSplit)
dimSize[i] = big.dimSize[i];
else
dimSize[i] = big.dimSize[i] / splitNum;
}
for (int i = 0; i < splitNum; i++) {
XTensor tensor = NewTensor(order, dimSize, big.dataType, big.denseRatio, big.devID, big.mem);
tensor.SetZeroAll();
tensor.SetTMP();
smalls.Add(&tensor);
}
/* call _Split function */ /* call _Split function */
_Split(&big, &smalls, whereToSplit, splitNum); _Split(&big, &smalls, whereToSplit, splitNum);
/* destroy variables */ /* tensor connections */
delete[] dimSize; for(int i = 0; i < smalls.count; i++){
XTensor * s = (XTensor*)smalls.Get(i);
return smalls; XLink::MakeLink(&big, NULL, s, SHAPE_SPLIT_LIST);
XLink::AddParamToHeadInt(s, whereToSplit);
/* it is tricky here that we keep the id of each
block, rather than the total number of splits */
XLink::AddParamToHeadInt(s, i);
}
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论