Commit 414ff54f by liyinqiao

1. update with master; 2. class core src

parent 0887fae1
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XLink.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
int XLink::paramSize = 64;
/* constuctor */
XLink::XLink()
{
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/* deconstructor */
XLink::~XLink()
{
delete[] tails;
delete[] (char*)params;
}
/* reset it */
void XLink::Reset()
{
delete[] tails;
delete[] (char*)params;
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/*
set edge type name
>> typeName - type name in string
*/
void XLink::SetType(const char * typeName)
{
type[0] = 0;
if(typeName == NULL)
return;
strcpy(type, typeName);
}
/*
set head
>> h - pointer to the head tensor
*/
void XLink::SetHead(XTensor * h)
{
head = h;
}
/*
add a tail
>> t - pointer to the tail tensor
*/
void XLink::AddTail(XTensor * t)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 1];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t;
delete[] ts;
}
/*
add two tails in one time
>> t1 - pointer to the tail tensor
>> t2 - pointer to another tail tensor
*/
void XLink::AddTwoTails(XTensor * t1, XTensor * t2)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 2];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t1;
tails[tailNum++] = t2;
delete[] ts;
}
/*
add a parameter
>> param - parameter in default type
*/
void XLink::AddParam(DTYPE param)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize);
*p = param;
paramNum++;
delete[] (char*)ps;
}
/*
add a parameter
>> param - pointer to the parameter
>> size - size of the parameter
*/
void XLink::AddParam(void * param, int size)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
char * p = (char*)params + paramNum * paramSize;
memcpy(p, param, size);
paramNum++;
delete[] (char*)ps;
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - another tail tensor
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName)
{
if(h != NULL)
return;
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
if(t1 != NULL && t2 != NULL)
income.AddTwoTails(t1, t2);
else if(t1 != NULL)
income.AddTail(t1);
else{
ShowNTErrors("TODO!");
}
income.SetType(typeName);
/* backward for t1 */
if(t1 != NULL){
XLink &outgo = t1->outgo;
CheckNTErrors(outgo.head != t1, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
/* backward for t2 */
if(t2 != NULL){
XLink &outgo = t2->outgo;
CheckNTErrors(outgo.head != t2, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
create a hyper edge with a list of tensors and a output tensor
>> list - a list of input tensors
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XList * list, XTensor * h, const char * typeName)
{
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
income.SetType(typeName);
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
income.AddTail(t);
}
/* backward */
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
XLink &outgo = t->outgo;
CheckNTErrors(outgo.head != t, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
add parameters
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHead(XTensor * h, DTYPE param)
{
if(h != NULL)
return;
h->income.AddParam(param);
}
/*
add an integer parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadInt(XTensor * h, int param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(int));
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XGlobal.h"
#include "XTensor.h"
#ifndef __XLINK_H__
#define __XLINK_H__
#include "XGlobal.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
/*
This defines the link among tensors in networks. XLink can be
cast as a hyperedge in a graph. when we compute on tensors, we actually create a
network where nodes are tensors and edges the connections among them. Each connection is
a hyperedge whose head is the output tensor and tails are input tensors. E.g,
c = a + b
represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
+ (=c)
/ \
a b
for c, we have a incoming edge (a, b) -> c
for a, we also have a edge c -> a in the reverse order (in a view of acyclic directed graphs)
*/
struct XLink
{
/* head of the hyperedge */
XTensor * head;
/* tails of the hyperedge */
XTensor ** tails;
/* number of tails */
int tailNum;
/* parameters used. e.g., c = a * b * \alpha
scalar \alpha is the parameter */
void * params;
/* number of parameters */
int paramNum;
/* size of each parameter */
static int paramSize;
/* name of the hyperedge type. e.g., sum, mul ... */
char type[MAX_OP_NAME_LENGTH];
/* constuctor */
XLink();
/* deconstructor */
~XLink();
/* reset it */
void Reset();
/* set edge type name */
void SetType(const char * typeName);
/* set head */
void SetHead(XTensor * h);
/* add a tail */
void AddTail(XTensor * t);
/* add two tails in one time */
void AddTwoTails(XTensor * t1, XTensor * t2);
/* add a integer parameter */
void AddParam(DTYPE param);
/* add a integer parameter */
void AddParam(void * param, int size);
/* create a hyper edge with two input tensors and a output tensor */
static
void MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName);
/* create a hyper edge with a list of tensors and a output tensor */
static
void MakeLink(XList * list, XTensor * h, const char * typeName);
/* add a parameter */
static
void AddParamToHead(XTensor * h, DTYPE param);
/* add an integer parameter */
static
void AddParamToHeadInt(XTensor * h, int param);
};
} // namespace nts(NiuTrans.Tensor)
#endif // __XLINK_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* We define various names here
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
* It was really HOT these days. I can't imagine what a hot day here in Shenyang!
*/
#ifndef __XNAME_H__
#define __XNAME_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MATMUL "M_MATMUL"
#define MATH_CONCATENATESOLY "M_CONCATENATESOLY"
#define MATH_COPYVALUES "M_COPYVALUES"
#define MATH_MATRIXMUL "M_MATRIXMUL"
#define MATH_MATRIXMUL2D "M_MATRIXMUL2D"
#define MATH_MATRIXMULBATCHED "M_MATRIXMULBATCHED"
#define MATH_MERGE "M_MERGE"
#define MATH_MULTIPLY "M_MULTIPLY"
#define MATH_REDUCEMAX "M_REDUCEMAX"
#define MATH_REDUCESUM "M_REDUCESUM"
#define MATH_SELECTRANGE "M_SELECTRANGE"
#define MATH_SORT "M_SORT"
#define MATH_SUM "M_SUM"
#define MATH_TOPK "M_TOPK"
#define MATH_UNSQUEEZE "M_UNSQUEEZE"
} // namespace nts(NiuTrans.Tensor)
#endif // __XNAME_H__
\ No newline at end of file
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "XMem.h" #include "XMem.h"
#include "XHeap.h" #include "XHeap.h"
#include "XBLAS.h" #include "XBLAS.h"
#include "core/MergeBlockLists.h" #include "core/shape/MergeBlockLists.h"
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -47,8 +47,8 @@ ...@@ -47,8 +47,8 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
#include <curand.h> #include <curand.h>
#include "core/FlushToMem.cuh" #include "core/utilities/FlushToMem.cuh"
#include "core/SetAscendingOrder.cuh" #include "core/utilities/SetAscendingOrder.cuh"
#endif #endif
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* *
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
* I'm working while most of the students are enjoying their holidays :( * I'm working while most of the students are enjoying their holidays :(
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes * $Updated by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
* *
*/ */
...@@ -36,10 +36,14 @@ ...@@ -36,10 +36,14 @@
#include "XList.h" #include "XList.h"
#include "XDataType.h" #include "XDataType.h"
#include "XMem.h" #include "XMem.h"
#include "XLink.h"
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
/* cross reference */
struct XLink;
/* define the maximum number of dimensions in a tensor */ /* define the maximum number of dimensions in a tensor */
#define MAX_TENSOR_DIM_NUM 6 #define MAX_TENSOR_DIM_NUM 6
#define USE_BATCHED_STRIDED_MAT_MUL #define USE_BATCHED_STRIDED_MAT_MUL
...@@ -47,9 +51,7 @@ namespace nts{ ...@@ -47,9 +51,7 @@ namespace nts{
#define MIN_TENSOR_SPLIT_LIST_NUM 1024 #define MIN_TENSOR_SPLIT_LIST_NUM 1024
#define MIN_TENSOR_CAT_NUM 8 #define MIN_TENSOR_CAT_NUM 8
/* /* computation flags */
computation flags
*/
#define UNSAFE_BUT_FAST_MEM #define UNSAFE_BUT_FAST_MEM
#define FAST_MATRIX #define FAST_MATRIX
...@@ -59,7 +61,6 @@ is the parent class of XMatrix. ...@@ -59,7 +61,6 @@ is the parent class of XMatrix.
*/ */
struct XTensor struct XTensor
{ {
public:
/* memory pool */ /* memory pool */
XMem * mem; XMem * mem;
...@@ -130,10 +131,23 @@ public: ...@@ -130,10 +131,23 @@ public:
/* indicates whether the tensor is initialized or not */ /* indicates whether the tensor is initialized or not */
bool isInit; bool isInit;
/******************************************************************* /*
XTensor untilities the link used to form networks. Note that when we compute on tensors, we actually create a
*/ network where nodes are tensors and edges the connections among them. Each connection is
public: a hyperedge whose head is the output tensor and tails are input tensors. E.g,
c = a + b
represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
Here "income" keeps which nodes (tensors) are used to form the current node (tensor).
*/
XLink income;
/* It keeps which nodes (tensors) we go to from the current node (tensor). */
XLink outgo;
/********************
XTensor untilities
********************/
/* constructor */ /* constructor */
XTensor(); XTensor();
......
...@@ -403,7 +403,7 @@ int ToCPUInt(int devID, void * value) ...@@ -403,7 +403,7 @@ int ToCPUInt(int devID, void * value)
} }
} }
/* set the value that is kept on a device */ /* assign a number to a variable that is kept on a specified device */
bool SetToDevice(int devID, void * p, DTYPE value) bool SetToDevice(int devID, void * p, DTYPE value)
{ {
if(p == NULL) if(p == NULL)
...@@ -412,7 +412,7 @@ bool SetToDevice(int devID, void * p, DTYPE value) ...@@ -412,7 +412,7 @@ bool SetToDevice(int devID, void * p, DTYPE value)
if(devID < 0) if(devID < 0)
*(DTYPE*)p = value; *(DTYPE*)p = value;
else{ else{
XMemCopy(p, devID, &value, -1, sizeof(DTYPE*)); XMemCopy(p, devID, &value, -1, sizeof(DTYPE));
} }
return true; return true;
......
...@@ -26,47 +26,49 @@ ...@@ -26,47 +26,49 @@
#include "../XTensor.h" #include "../XTensor.h"
#include "Concatenate.h" #include "shape/Concatenate.h"
#include "ConcatenateSolely.h" #include "shape/ConcatenateSolely.h"
#include "CopyBlocks.h" #include "movement/CopyBlocks.h"
#include "CopyBlocksInGrid.h" #include "movement/CopyBlocksInGrid.h"
#include "CopyBlocksOnSite.h" #include "movement/CopyBlocksOnSite.h"
#include "CopyData2D.h" #include "movement/CopyData2D.h"
#include "CopyIndexed.h" #include "movement/CopyIndexed.h"
#include "CopyInGrid.h" #include "movement/CopyInGrid.h"
#include "CopyValues.h" #include "movement/CopyValues.h"
#include "FlushToMem.h" #include "utilities/FlushToMem.h"
#include "MakeMergeBlockIndex.h" #include "shape/MakeMergeBlockIndex.h"
#include "MakeSplitBlockIndex.h" #include "shape/MakeSplitBlockIndex.h"
#include "MatrixMul.h" #include "arithmetic/MatrixMul.h"
#include "MatrixMul2D.h" #include "arithmetic/MatrixMul2D.h"
#include "MatrixMul2DMultiTheading.h" #include "arithmetic/MatrixMul2DMultiTheading.h"
#include "MatrixMul2DParallel.h" #include "arithmetic/MatrixMul2DParallel.h"
#include "MatrixMulBatched.h" #include "arithmetic/MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h" #include "arithmetic/MatrixMULBatchedCPU.h"
#include "Merge.h" #include "shape/Merge.h"
#include "MergeBlockLists.h" #include "shape/MergeBlockLists.h"
#include "MultiplyElementWise.h" #include "arithmetic/Multiply.h"
#include "Negate.h" #include "arithmetic/Negate.h"
#include "Normalize.h" #include "math/Normalize.h"
#include "Power.h" #include "shape/Permute.h"
#include "ReduceMax.h" #include "math/Power.h"
#include "ReduceMean.h" #include "reduce/ReduceMax.h"
#include "ReduceStandardVariance.h" #include "reduce/ReduceMean.h"
#include "ReduceSum.h" #include "reduce/ReduceStandardVariance.h"
#include "ReduceSumSquared.h" #include "reduce/ReduceSum.h"
#include "ReduceVariance.h" #include "reduce/ReduceSumSquared.h"
#include "ScaleAndShift.h" #include "reduce/ReduceVariance.h"
#include "Select.h" #include "math/ScaleAndShift.h"
#include "SetData.h" #include "getandset/Select.h"
#include "Sort.h" #include "getandset/SetData.h"
#include "Split.h" #include "sort/Sort.h"
#include "Sum.h" #include "shape/Split.h"
#include "SumByColumnTV.h" #include "arithmetic/Sum.h"
#include "SumByColumnVT.h" #include "arithmetic/SumByColumnTV.h"
#include "TopK.h" #include "arithmetic/SumByColumnVT.h"
#include "Unsqueeze.h" #include "sort/TopK.h"
#include "XMatrixSegment.h" #include "shape/Transpose.h"
#include "XTensorBLAS.h" #include "shape/Unsqueeze.h"
#include "utilities/XMatrixSegment.h"
#include "arithmetic/XTensorBLAS.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
\ No newline at end of file
...@@ -37,41 +37,33 @@ public: ...@@ -37,41 +37,33 @@ public:
concatenate a list of tensors along a given dimension concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely" Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */ or "Merge" by means of the tensor shapes */
extern "C"
void Concatenate(XList * smalls, XTensor * big, int dim); void Concatenate(XList * smalls, XTensor * big, int dim);
/* concatenate two tensors along a given dimension */ /* concatenate two tensors along a given dimension */
extern "C"
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim); void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim);
/* concatenate a list of tensors along a given dimension */ /* concatenate a list of tensors along a given dimension */
extern "C"
static static
void ConcatenateSolely(XList * smalls, XTensor * big, int dim); void ConcatenateSolely(XList * smalls, XTensor * big, int dim);
/* copy selected sub-tensors */ /* copy selected sub-tensors */
extern "C"
static static
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum); bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
/* copy a number of blocks in grid */ /* copy a number of blocks in grid */
extern "C"
static static
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev = false); void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev = false);
/* copy s to t */ /* copy s to t */
extern "C"
static static
bool CopyValues(XTensor * s, XTensor * t, XStream * stream = NULL); bool CopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
/* set target data block index for the data movement in merge */ /* set target data block index for the data movement in merge */
extern "C"
static static
void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge, void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem); int splitSizeInGrid, int gridSize, int gridNum, XMem * mem);
/* set target data block index for the data movement in split */ /* set target data block index for the data movement in split */
extern "C"
static static
void MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem); void MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem);
...@@ -86,7 +78,6 @@ public: ...@@ -86,7 +78,6 @@ public:
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs
normal matrix multiplication if A = y * z and B = x * y. normal matrix multiplication if A = y * z and B = x * y.
*/ */
extern "C"
static static
void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
...@@ -96,7 +87,6 @@ public: ...@@ -96,7 +87,6 @@ public:
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
*/ */
extern "C"
static static
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
...@@ -105,7 +95,6 @@ public: ...@@ -105,7 +95,6 @@ public:
matrix multiplication for a block (x1,y1) - (x2,y2) matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/ */
extern "C"
static static
void MatrixMul2DMultiTheading(XList * args); void MatrixMul2DMultiTheading(XList * args);
...@@ -114,7 +103,6 @@ public: ...@@ -114,7 +103,6 @@ public:
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
*/ */
extern "C"
static static
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
...@@ -126,36 +114,29 @@ public: ...@@ -126,36 +114,29 @@ public:
ci = trans(ai) * trans(bi) * alpha + cm * beta ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired where trans() returns the transposed matrix if the flag is fired
*/ */
extern "C"
static static
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/* matrix multiplication in batch mode (CPU code) */ /* matrix multiplication in batch mode (CPU code) */
extern "C"
static static
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */ /* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
extern "C"
void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1); void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */ /* merge small tensors into a big tensor */
extern "C"
void Merge(XList * smalls, XTensor * big, int whereToMerge); void Merge(XList * smalls, XTensor * big, int whereToMerge);
/* merge data by blocks */ /* merge data by blocks */
extern "C"
void MergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem); void MergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
/* element-wise product of two tensors */ /* element-wise product of two tensors */
extern "C"
static static
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0); void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0);
/* set every entry to its minus value */ /* set every entry to its minus value */
extern "C"
void Negate(XTensor * a); void Negate(XTensor * a);
/* /*
...@@ -163,16 +144,13 @@ public: ...@@ -163,16 +144,13 @@ public:
y = a * (x-mean)/sqrt(variance+\epsilon) + b y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter. where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/ */
extern "C"
static static
void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTensor * var, XTensor * a, XTensor * b, DTYPE epsilon); void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTensor * var, XTensor * a, XTensor * b, DTYPE epsilon);
/* get the power(x, y) */ /* get the power(x, y) */
extern "C"
void Power(XTensor * a, DTYPE p); void Power(XTensor * a, DTYPE p);
/* get the max value of the items along a dimension of the tensor. */ /* get the max value of the items along a dimension of the tensor. */
extern "C"
static static
void ReduceMax(XTensor * input, XTensor * output, int dim); void ReduceMax(XTensor * input, XTensor * output, int dim);
...@@ -180,7 +158,6 @@ public: ...@@ -180,7 +158,6 @@ public:
get the mean value along a dimension of the tensor. For a 1-dimensional data array a, get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i mean = (1/n) * sum_i input_i
*/ */
extern "C"
static static
void ReduceMean(XTensor * input, XTensor * output, int dim); void ReduceMean(XTensor * input, XTensor * output, int dim);
...@@ -188,7 +165,6 @@ public: ...@@ -188,7 +165,6 @@ public:
standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a, standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = (1/n * \sum_i (a_i - mean)^2)^0.5 variance = (1/n * \sum_i (a_i - mean)^2)^0.5
*/ */
extern "C"
static static
void ReduceStandardVariance(XTensor * input, XTensor * output, int dim, XTensor * mean); void ReduceStandardVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
...@@ -197,7 +173,6 @@ public: ...@@ -197,7 +173,6 @@ public:
sum = \sum_i (a_i - shift) if isExp == false sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
extern "C"
static static
void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false); void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false);
...@@ -205,7 +180,6 @@ public: ...@@ -205,7 +180,6 @@ public:
squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a, squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2 sum = \sum_i (a_i - shift)^2
*/ */
extern "C"
static static
void ReduceSumSquared(XTensor * input, XTensor * output, int dim, XTensor * shift); void ReduceSumSquared(XTensor * input, XTensor * output, int dim, XTensor * shift);
...@@ -213,73 +187,59 @@ public: ...@@ -213,73 +187,59 @@ public:
variance of the items along a dimension of the tensor. For a 1-dimensional data array a, variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2 variance = 1/n * \sum_i (a_i - mean)^2
*/ */
extern "C"
static static
void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean); void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
/* scale and shift all tensor entires */ /* scale and shift all tensor entires */
extern "C"
static static
void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift); void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift);
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */ /* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern "C"
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum); void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */ /* split a big tensor into small tensors */
extern "C"
void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum); void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum);
/* tensor summation c = a + b * \beta */ /* tensor summation c = a + b * \beta */
extern "C"
static static
void Sum(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); void Sum(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* sum of a tensor and a (column) vector */ /* sum of a tensor and a (column) vector */
extern "C"
static static
void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* sum of a (column) vector and a tensor */ /* sum of a (column) vector and a tensor */
extern "C"
static static
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* get the top-k items along a given dimension */ /* get the top-k items along a given dimension */
extern "C"
static static
void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k); void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k);
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */ /* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
extern "C"
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize); void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize);
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */ /* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */ /* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
extern "C"
static static
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...); void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
/* segment a block into sub-blocks */ /* segment a block into sub-blocks */
extern "C"
static static
int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex); int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex);
/* segment a block into sub-blocks */ /* segment a block into sub-blocks */
extern "C"
static static
int SegmentTensor2DInRows(int rowNum, int colNum, int blockNum, int * blockIndex); int SegmentTensor2DInRows(int rowNum, int colNum, int blockNum, int * blockIndex);
/* matrix multiplication (BLAS) */ /* matrix multiplication (BLAS) */
extern "C"
static static
void MatrixMULCPU(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); void MatrixMULCPU(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
#ifdef USE_CUDA #ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */ /* matrix multiplication via cuda version BLAS */
extern "C"
static static
void CudaBLASMatrixMUL(cublasHandle_t * handle, void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
...@@ -288,7 +248,6 @@ public: ...@@ -288,7 +248,6 @@ public:
int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C"
static static
void CudaBLASMatrixMULBatched(cublasHandle_t * handle, void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
...@@ -297,7 +256,6 @@ public: ...@@ -297,7 +256,6 @@ public:
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
static static
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
...@@ -306,7 +264,6 @@ public: ...@@ -306,7 +264,6 @@ public:
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C"
static static
void CudaBLASMatrixMULList(cublasHandle_t * handle, XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void CudaBLASMatrixMULList(cublasHandle_t * handle, XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMULBatchedCPU.h" #include "MatrixMULBatchedCPU.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHEDCPU_H__ #ifndef __MATRIXMULBATCHEDCPU_H__
#define __MATRIXMULBATCHEDCPU_H__ #define __MATRIXMULBATCHEDCPU_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMul.h" #include "MatrixMul.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "MatrixMULBatchedCPU.h" #include "MatrixMULBatchedCPU.h"
...@@ -58,6 +59,12 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -58,6 +59,12 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2), CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!"); "Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL_H__ #ifndef __MATRIXMUL_H__
#define __MATRIXMUL_H__ #define __MATRIXMUL_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "MatrixMul2D.cuh" #include "MatrixMul2D.cuh"
#include "MatrixMul2DParallel.h" #include "MatrixMul2DParallel.h"
...@@ -51,6 +52,13 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -51,6 +52,13 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!"); "Input tensors must have a order = 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL2D);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = a->dimSize[0], am = a->dimSize[1]; int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1]; int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1]; int cn = c->dimSize[0], cm = c->dimSize[1];
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "MatrixMul2D.cuh" #include "MatrixMul2D.cuh"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2D_H__ #ifndef __MATRIXMUL2D_H__
#define __MATRIXMUL2D_H__ #define __MATRIXMUL2D_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMul2DMultiTheading.h" #include "MatrixMul2DMultiTheading.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DMULTITHEADING_H__ #ifndef __MATRIXMUL2DMULTITHEADING_H__
#define __MATRIXMUL2DMULTITHEADING_H__ #define __MATRIXMUL2DMULTITHEADING_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMul2DParallel.h" #include "MatrixMul2DParallel.h"
#include "MatrixMul2DMultiTheading.h" #include "MatrixMul2DMultiTheading.h"
#include "XMatrixSegment.h" #include "../utilities/XMatrixSegment.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DPARALLEL_H__ #ifndef __MATRIXMUL2DPARALLEL_H__
#define __MATRIXMUL2DPARALLEL_H__ #define __MATRIXMUL2DPARALLEL_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMulBatched.h" #include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h" #include "MatrixMULBatchedCPU.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
...@@ -53,6 +54,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -53,6 +54,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2), CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!"); "Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHED_H__ #ifndef __MATRIXMULBATCHED_H__
#define __MATRIXMULBATCHED_H__ #define __MATRIXMULBATCHED_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,10 @@ ...@@ -19,9 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MultiplyElementWise.h" #include "../../XName.h"
#include "MultiplyElementWise.cuh" #include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -35,16 +36,21 @@ where i is the index of the item ...@@ -35,16 +36,21 @@ where i is the index of the item
>> alpha - the coefficient >> alpha - the coefficient
>> >>
*/ */
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha) void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MULTIPLY);
XLink::AddParamToHeadInt(c, leadingDim);
XLink::AddParamToHead(c, alpha);
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
CudaMultiplyElementWise(a, b, c, leadingDim, alpha); CudaMultiply(a, b, c, leadingDim, alpha);
return; return;
} }
#endif #endif
......
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MultiplyElementWise.h" #include "Multiply.h"
#include "MultiplyElementWise.cuh" #include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -121,7 +121,7 @@ where i is the item index ...@@ -121,7 +121,7 @@ where i is the item index
>> alpha - the coefficient >> alpha - the coefficient
*/ */
extern "C" extern "C"
void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha) void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
......
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#ifndef __MULTIPLYELEMENTWISE_CUH__ #ifndef __MULTIPLY_CUH__
#define __MULTIPLYELEMENTWISE_CUH__ #define __MULTIPLY_CUH__
#include "MultiplyElementWise.h" #include "Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -42,11 +42,11 @@ void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE al ...@@ -42,11 +42,11 @@ void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE al
/* element-wise product of two tensors */ /* element-wise product of two tensors */
extern "C" extern "C"
void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha); void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYELEMENTWISE_CUH__ #endif // __MULTIPLY_CUH__
...@@ -19,17 +19,17 @@ ...@@ -19,17 +19,17 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#ifndef __MULTIPLYELEMENTWISE_H__ #ifndef __MULTIPLY_H__
#define __MULTIPLYELEMENTWISE_H__ #define __MULTIPLY_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* element-wise product of two tensors */ /* element-wise product of two tensors */
extern "C" extern "C"
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0); void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYELEMENTWISE_H__ #endif // __MULTIPLY_H__
\ No newline at end of file \ No newline at end of file
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "Negate.h" #include "Negate.h"
#include "Negate.cuh" #include "Negate.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Negate.h" #include "Negate.h"
#include "Negate.cuh" #include "Negate.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __NEGATE_H__ #ifndef __NEGATE_H__
#define __NEGATE_H__ #define __NEGATE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Sum.h" #include "Sum.h"
#include "Sum.cuh" #include "Sum.cuh"
...@@ -37,15 +38,16 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) ...@@ -37,15 +38,16 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
if (c == NULL) if (c == NULL)
c = a; c = a;
CheckNTErrors((a && b && c), CheckNTErrors(a && b && c, "Empty tensors in addition!");
"Empty tensors in addition!"); CheckNTErrors(a->unitNum == b->unitNum && a->unitNum == c->unitNum,
CheckNTErrors((a->unitNum == b->unitNum && a->unitNum == c->unitNum),
"Unmatched tensors in addition!"); "Unmatched tensors in addition!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Unmatched tensors in addition!"); "Unmatched tensors in addition!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_SUM);
XLink::AddParamToHead(c, beta);
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "Sum.cuh" #include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SUM_H__ #ifndef __SUM_H__
#define __SUM_H__ #define __SUM_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnTV.h" #include "SumByColumnTV.h"
#include "SumByColumnTV.cuh" #include "SumByColumnTV.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnTV.h" #include "SumByColumnTV.h"
#include "SumByColumnTV.cuh" #include "SumByColumnTV.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_CUH__ #ifndef __REDUCEMAX_CUH__
#define __REDUCEMAX_CUH__ #define __REDUCEMAX_CUH__
#include "ReduceMax.h" #include "../reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNTV_H__ #ifndef __SUMBYCOLUMNTV_H__
#define __SUMBYCOLUMNTV_H__ #define __SUMBYCOLUMNTV_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnVT.h" #include "SumByColumnVT.h"
#include "SumByColumnVT.cuh" #include "SumByColumnVT.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnVT.h" #include "SumByColumnVT.h"
#include "SumByColumnVT.cuh" #include "SumByColumnVT.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNVT_H__ #ifndef __SUMBYCOLUMNVT_H__
#define __SUMBYCOLUMNVT_H__ #define __SUMBYCOLUMNVT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,8 +20,8 @@ ...@@ -20,8 +20,8 @@
*/ */
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XBLAS.h" #include "../../XBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,16 +19,18 @@ ...@@ -19,16 +19,18 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */ /*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMUL(cublasHandle_t * handle, void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
...@@ -83,7 +85,9 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle, ...@@ -83,7 +85,9 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
} }
} }
/* matrix multiplication via cuda version BLAS */ /*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULBatched(cublasHandle_t * handle, void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
...@@ -139,6 +143,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle, ...@@ -139,6 +143,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
} }
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
...@@ -193,7 +198,9 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -193,7 +198,9 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
} }
} }
/* matrix multiplication via cuda version BLAS */ /*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULList(cublasHandle_t * handle, void CudaBLASMatrixMULList(cublasHandle_t * handle,
XList * a, MATRIX_TRANS_TYPE transposedA, XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB, XList * b, MATRIX_TRANS_TYPE transposedB,
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __XTENSORBLAS_H__ #ifndef __XTENSORBLAS_H__
#define __XTENSORBLAS_H__ #define __XTENSORBLAS_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
*/ */
#include "../../XUtility.h"
#include "../../XName.h"
#include "Select.h" #include "Select.h"
#include "../XUtility.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -28,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -28,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
generate a tensor with seleccted data in range[low,high] along the given dimension generate a tensor with seleccted data in range[low,high] along the given dimension
c = select(a) c = select(a)
>> a - input tensor >> a - input tensor
>> c - result tensor
>> dim - the dimension along with which we do the job >> dim - the dimension along with which we do the job
>> low - lower bound >> low - lower bound
>> high - higher bound. >> high - higher bound.
Note that range [1,3] means that we select 1 and 2. Note that range [1,3] means that we select 1 and 2.
>> c - result tensor
*/ */
void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c) void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
{ {
CheckNTErrors(a != NULL && c != NULL, "empty tensors!"); CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!"); CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
...@@ -54,8 +55,14 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c) ...@@ -54,8 +55,14 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
} }
} }
int dimRDI = a->order - dim - 1; /* make tensor connections */
XLink::MakeLink(a, NULL, c, MATH_SELECTRANGE);
XLink::AddParamToHeadInt(c, dim);
XLink::AddParamToHeadInt(c, low);
XLink::AddParamToHeadInt(c, high);
int stride = 1; int stride = 1;
int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++) for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i]; stride *= a->dimSizeRDI[i];
......
...@@ -22,18 +22,18 @@ ...@@ -22,18 +22,18 @@
#ifndef __SELECT_H__ #ifndef __SELECT_H__
#define __SELECT_H__ #define __SELECT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with seleccted data c = select(a) */ /* generate a tensor with seleccted data c = select(a) */
extern "C" extern "C"
void Select(XTensor * a, XTensor * indexCPU, XTensor * c); void Select(XTensor * a, XTensor * c, XTensor * indexCPU);
/* generate a tensor with seleccted data in range[low,high] along the given dimension /* generate a tensor with seleccted data in range[low,high] along the given dimension
c = select(a) */ c = select(a) */
extern "C" extern "C"
void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c); void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
*/ */
#include "SetData.h" #include "SetData.h"
#include "CopyValues.h" #include "../movement/CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 ) #if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h" #include "sys/time.h"
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#ifndef __SETDATA_H__ #ifndef __SETDATA_H__
#define __SETDATA_H__ #define __SETDATA_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include <math.h> #include <math.h>
#include "../XTensor.h" #include "../../XTensor.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __NORMALIZE_H__ #ifndef __NORMALIZE_H__
#define __NORMALIZE_H__ #define __NORMALIZE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include <math.h> #include <math.h>
#include "../XTensor.h" #include "../../XTensor.h"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __POWER_H__ #ifndef __POWER_H__
#define __POWER_H__ #define __POWER_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "ScaleAndShift.h" #include "ScaleAndShift.h"
#include "ScaleAndShift.cuh" #include "ScaleAndShift.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_CUH__ #ifndef __SCALEANDSHIFT_CUH__
#define __SCALEANDSHIFT_CUH__ #define __SCALEANDSHIFT_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_H__ #ifndef __SCALEANDSHIFT_H__
#define __SCALEANDSHIFT_H__ #define __SCALEANDSHIFT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "CopyBlocks.h" #include "CopyBlocks.h"
#include "CopyBlocksOnSite.h" #include "CopyBlocksOnSite.h"
#include "CopyBlocksSelected.cuh" #include "CopyBlocksSelected.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_H__ #ifndef __COPYBLOCKS_H__
#define __COPYBLOCKS_H__ #define __COPYBLOCKS_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "CopyBlocksInGrid.cuh" #include "CopyBlocksInGrid.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
#include "CopyBlocksInGrid.cuh" #include "CopyBlocksInGrid.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_CUH__ #ifndef __COPYBLOCKSINGRID_CUH__
#define __COPYBLOCKSINGRID_CUH__ #define __COPYBLOCKSINGRID_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_H__ #ifndef __COPYBLOCKSINGRID_H__
#define __COPYBLOCKSINGRID_H__ #define __COPYBLOCKSINGRID_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "CopyBlocksOnSite.h" #include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh" #include "CopyBlocksOnSite.cuh"
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "CopyBlocksOnSite.h" #include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh" #include "CopyBlocksOnSite.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_CUH__ #ifndef __COPYBLOCKS_CUH__
#define __COPYBLOCKS_CUH__ #define __COPYBLOCKS_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSONSITE_H__ #ifndef __COPYBLOCKSONSITE_H__
#define __COPYBLOCKSONSITE_H__ #define __COPYBLOCKSONSITE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#include "CopyBlocks.h" #include "CopyBlocks.h"
#include "CopyBlocksSelected.cuh" #include "CopyBlocksSelected.cuh"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSSELECTED_CUH__ #ifndef __COPYBLOCKSSELECTED_CUH__
#define __COPYBLOCKSSELECTED_CUH__ #define __COPYBLOCKSSELECTED_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "CopyData2D.h" #include "CopyData2D.h"
#include "../XUtility.h" #include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYDATA2D_H__ #ifndef __COPYDATA2D_H__
#define __COPYDATA2D_H__ #define __COPYDATA2D_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "CopyInGrid.h" #include "CopyInGrid.h"
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYINGRID_H__ #ifndef __COPYINGRID_H__
#define __COPYINGRID_H__ #define __COPYINGRID_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYINDEXED_H__ #ifndef __COPYINDEXED_H__
#define __COPYINDEXED_H__ #define __COPYINDEXED_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../../XName.h"
#include "CopyValues.h" #include "CopyValues.h"
#include "CopyValues.cuh" #include "CopyValues.cuh"
...@@ -41,6 +42,9 @@ bool CopyValues(XTensor * s, XTensor * t, XStream * stream) ...@@ -41,6 +42,9 @@ bool CopyValues(XTensor * s, XTensor * t, XStream * stream)
CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!"); CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!");
CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!"); CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!");
/* make tensor connections */
XLink::MakeLink(s, NULL, t, MATH_COPYVALUES);
if ((s->dataType == X_FLOAT16 && t->dataType == X_FLOAT) || if ((s->dataType == X_FLOAT16 && t->dataType == X_FLOAT) ||
(s->dataType == X_FLOAT && t->dataType == X_FLOAT16)) { (s->dataType == X_FLOAT && t->dataType == X_FLOAT16)) {
CheckNTErrors(((s->devID < 0 && t->devID < 0) || s->devID == t->devID), CheckNTErrors(((s->devID < 0 && t->devID < 0) || s->devID == t->devID),
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#include "CopyValues.h" #include "CopyValues.h"
#include "CopyValues.cuh" #include "CopyValues.cuh"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_CUH__ #ifndef __COPYVALUES_CUH__
#define __COPYVALUES_CUH__ #define __COPYVALUES_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_H__ #ifndef __COPYVALUES_H__
#define __COPYVALUES_H__ #define __COPYVALUES_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "ReduceMax.h" #include "ReduceMax.h"
#include "ReduceMax.cuh" #include "ReduceMax.cuh"
...@@ -52,6 +53,10 @@ void ReduceMax(XTensor * input, XTensor * output, int dim) ...@@ -52,6 +53,10 @@ void ReduceMax(XTensor * input, XTensor * output, int dim)
} }
} }
/* make tensor connections */
XLink::MakeLink(input, NULL, output, MATH_REDUCEMAX);
XLink::AddParamToHeadInt(output, dim);
if(input->devID >= 0){ if(input->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
CudaReduceMax(input, output, dim); CudaReduceMax(input, output, dim);
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "ReduceMax.h" #include "ReduceMax.h"
#include "ReduceMax.cuh" #include "ReduceMax.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_H__ #ifndef __REDUCEMAX_H__
#define __REDUCEMAX_H__ #define __REDUCEMAX_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceMean.h" #include "ReduceMean.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEMEAN_H__ #ifndef __REDUCEMEAN_H__
#define __REDUCEMEAN_H__ #define __REDUCEMEAN_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCESTANDARDVARIANCE_H__ #ifndef __REDUCESTANDARDVARIANCE_H__
#define __REDUCESTANDARDVARIANCE_H__ #define __REDUCESTANDARDVARIANCE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceSum.cuh" #include "ReduceSum.cuh"
#include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -58,6 +59,12 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP ...@@ -58,6 +59,12 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP
} }
} }
/* make tensor connections */
XLink::MakeLink(input, shift, output, MATH_REDUCESUM);
XLink::AddParamToHeadInt(output, dim);
XLink::AddParamToHead(output, power);
XLink::AddParamToHeadInt(output, isExp);
if(input->devID >= 0){ if(input->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
CudaReduceSum(input, output, dim, shift, power, isExp); CudaReduceSum(input, output, dim, shift, power, isExp);
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "ReduceSum.cuh" #include "ReduceSum.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCESUM_H__ #ifndef __REDUCESUM_H__
#define __REDUCESUM_H__ #define __REDUCESUM_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCESUMSQUARED_H__ #ifndef __REDUCESUMSQUARED_H__
#define __REDUCESUMSQUARED_H__ #define __REDUCESUMSQUARED_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceVariance.h" #include "ReduceVariance.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEVARIANCE_H__ #ifndef __REDUCEVARIANCE_H__
#define __REDUCEVARIANCE_H__ #define __REDUCEVARIANCE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "Concatenate.h" #include "Concatenate.h"
#include "Merge.h" #include "Merge.h"
#include "ConcatenateSolely.h" #include "ConcatenateSolely.h"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __CONCATENATE_H__ #ifndef __CONCATENATE_H__
#define __CONCATENATE_H__ #define __CONCATENATE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h"
#include "ConcatenateSolely.h" #include "ConcatenateSolely.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
...@@ -36,6 +37,10 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim) ...@@ -36,6 +37,10 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
{ {
CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!"); CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!");
/* make tensor connections */
XLink::MakeLink(smalls, big, MATH_CONCATENATESOLY);
XLink::AddParamToHeadInt(big, dim);
int catDimSize = 0; int catDimSize = 0;
int dimRDI = big->order - dim - 1; int dimRDI = big->order - dim - 1;
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __CONCATENATESOLELY_H__ #ifndef __CONCATENATESOLELY_H__
#define __CONCATENATESOLELY_H__ #define __CONCATENATESOLELY_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh" #include "MakeMergeBlockIndex.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh" #include "MakeMergeBlockIndex.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __CUDAMAKEMERGEBLOCKINDEX_CUH__ #ifndef __CUDAMAKEMERGEBLOCKINDEX_CUH__
#define __CUDAMAKEMERGEBLOCKINDEX_CUH__ #define __CUDAMAKEMERGEBLOCKINDEX_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MAKEMERGEBLOCKINDEX_H__ #ifndef __MAKEMERGEBLOCKINDEX_H__
#define __MAKEMERGEBLOCKINDEX_H__ #define __MAKEMERGEBLOCKINDEX_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh" #include "MakeSplitBlockIndex.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh" #include "MakeSplitBlockIndex.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MAKESPLITBLOCKINDEX_H__ #ifndef __MAKESPLITBLOCKINDEX_H__
#define __MAKESPLITBLOCKINDEX_H__ #define __MAKESPLITBLOCKINDEX_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,11 +19,12 @@ ...@@ -19,11 +19,12 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h"
#include "Merge.h" #include "Merge.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "CopyBlocksOnSite.h" #include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -62,6 +63,11 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -62,6 +63,11 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
} }
} }
/* make tensor connections */
XLink::MakeLink(s, NULL, t, MATH_MERGE);
XLink::AddParamToHeadInt(t, whereToMerge);
XLink::AddParamToHeadInt(t, leadingDim);
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
int gridSize = 1; int gridSize = 1;
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MERGE_H__ #ifndef __MERGE_H__
#define __MERGE_H__ #define __MERGE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
#include "MergeBlockLists.cuh" #include "MergeBlockLists.cuh"
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
#include "MergeBlockLists.cuh" #include "MergeBlockLists.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MERGEBLOCKLISTS_H__ #ifndef __MERGEBLOCKLISTS_H__
#define __MERGEBLOCKLISTS_H__ #define __MERGEBLOCKLISTS_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-05
*/
#ifndef __PERMUTE_H__
#define __PERMUTE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define permute _Permute_
/* generate the tensor with permuted dimensions: b = permuted(a) */
extern "C"
void Permute(XTensor * a, XTensor * b, int * dimPermute);
/* permute the tensor dimensions on site: a = permuted(a) */
extern "C"
void Permute_(XTensor * a, int * dimPermute);
/* make a tensor with permuted dimensions: b = permuted(a) and return its pointer */
extern "C"
XTensor * _Permute(XTensor *a, int * dimPermute);
/* make a tensor with permuted dimensions: b = permuted(a) and return its body */
extern "C"
XTensor& _Permute_(XTensor &a, int * dimPermute);
} // namespace nts(NiuTrans.Tensor)
#endif // __PERMUTE_H__
...@@ -19,11 +19,11 @@ ...@@ -19,11 +19,11 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "Split.h" #include "Split.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "CopyBlocksOnSite.h" #include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SPLIT_H__ #ifndef __SPLIT_H__
#define __SPLIT_H__ #define __SPLIT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-05
* It will rain tomorrow - end of the hot days :)
*/
#ifndef __TRANSPOSE_H__
#define __TRANSPOSE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define transpose _Transpose_
/* generate a transposed 1D/2D tensor: b = transposed(a) */
void Transpose(XTensor * a, XTensor * b);
/* transpose a 1D/2D tensor on site: a = transposed(a) */
void Transpose_(XTensor * a);
/* make a transposed 1D/2D tensor: b = transposed(a) and return its pointer */
XTensor * _Transpose(XTensor * a);
/* make a transposed 1D/2D tensor: b = transposed(a) and return its body */
XTensor & _Transpose_(XTensor & a);
} // namespace nts(NiuTrans.Tensor)
#endif // __TRANSPOSE_H__
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Unsqueeze.h" #include "Unsqueeze.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
#include "Unsqueeze.cuh" #include "Unsqueeze.cuh"
...@@ -39,6 +40,11 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize) ...@@ -39,6 +40,11 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize)
CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!"); CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!");
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!"); CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!");
/* make tensor connections */
XLink::MakeLink(a, NULL, b, MATH_UNSQUEEZE);
XLink::AddParamToHeadInt(b, dim);
XLink::AddParamToHeadInt(b, dSize);
int dimRDI = b->order - dim - 1; int dimRDI = b->order - dim - 1;
for (int i = 0; i < b->order; i++) { for (int i = 0; i < b->order; i++) {
if (i < dimRDI) { if (i < dimRDI) {
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Unsqueeze.h" #include "Unsqueeze.h"
#include "Unsqueeze.cuh" #include "Unsqueeze.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __UNSQUEEZE_H__ #ifndef __UNSQUEEZE_H__
#define __UNSQUEEZE_H__ #define __UNSQUEEZE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h"
#include "Sort.h" #include "Sort.h"
#include "Sort.cuh" #include "Sort.cuh"
...@@ -38,8 +39,11 @@ void Sort(XTensor * a, XTensor * index, int dim) ...@@ -38,8 +39,11 @@ void Sort(XTensor * a, XTensor * index, int dim)
CheckNTErrors((a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1; /* make tensor connections */
XLink::MakeLink(a, NULL, index, MATH_SORT);
XLink::AddParamToHeadInt(index, dim);
int dimRDI = a->order - dim - 1;
/* make the index tensor */ /* make the index tensor */
index->SetAscendingOrder(dim); index->SetAscendingOrder(dim);
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Sort.h" #include "Sort.h"
#include "Sort.cuh" #include "Sort.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SORT_H__ #ifndef __SORT_H__
#define __SORT_H__ #define __SORT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "TopK.h" #include "TopK.h"
#include "TopK.cuh" #include "TopK.cuh"
...@@ -40,6 +41,11 @@ void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -40,6 +41,11 @@ void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((index == NULL || a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((index == NULL || a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
/* make tensor connections */
XLink::MakeLink(a, b, index, MATH_TOPK);
XLink::AddParamToHeadInt(index, dim);
XLink::AddParamToHeadInt(index, k);
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i == dimRDI) { if (i == dimRDI) {
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "TopK.h" #include "TopK.h"
#include "TopK.cuh" #include "TopK.cuh"
#include "Sort.cuh" #include "Sort.cuh"
...@@ -95,11 +95,6 @@ public: ...@@ -95,11 +95,6 @@ public:
/* swap */ /* swap */
__device__ void Swap(int i, int j) __device__ void Swap(int i, int j)
{ {
/*
CudaHeapNode<T> tmp = items[i];
items[i] = items[j];
items[j] = tmp;
*/
int tmpIndex = items[i].index; int tmpIndex = items[i].index;
T tmpValue = items[i].value; T tmpValue = items[i].value;
items[i] = items[j]; items[i] = items[j];
...@@ -433,7 +428,6 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -433,7 +428,6 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
} }
} }
/* we resort to sorting if the data cannot fit inside the shared memory */ /* we resort to sorting if the data cannot fit inside the shared memory */
else { else {
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
...@@ -449,8 +443,6 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -449,8 +443,6 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
if (a->mem != NULL) if (a->mem != NULL)
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int)); a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
else
XMemFree(a->devID, indexA->data);
delete indexA; delete indexA;
} }
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TOPK_H__ #ifndef __TOPK_H__
#define __TOPK_H__ #define __TOPK_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
*/ */
#include "../XUtility.h" #include "../../XUtility.h"
#include "FlushToMem.h" #include "FlushToMem.h"
#include "FlushToMem.cuh" #include "FlushToMem.cuh"
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "FlushToMem.cuh" #include "FlushToMem.cuh"
#include "../XUtility.h" #include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_CUH__ #ifndef __FLUSHTOMEM_CUH__
#define __FLUSHTOMEM_CUH__ #define __FLUSHTOMEM_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_H__ #ifndef __FLUSHTOMEM_H__
#define __FLUSHTOMEM_H__ #define __FLUSHTOMEM_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "SetAscendingOrder.cuh" #include "SetAscendingOrder.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SETASCENDINGORDER_CUH__ #ifndef __SETASCENDINGORDER_CUH__
#define __SETASCENDINGORDER_CUH__ #define __SETASCENDINGORDER_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __XMATRIXSEGMENT_H__ #ifndef __XMATRIXSEGMENT_H__
#define __XMATRIXSEGMENT_H__ #define __XMATRIXSEGMENT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "Identity.h" #include "Identity.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/CopyValues.h" #include "../core/movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -23,9 +23,9 @@ ...@@ -23,9 +23,9 @@
#include "../XUtility.h" #include "../XUtility.h"
#include "LogSoftmax.h" #include "LogSoftmax.h"
#include "LogSoftmax.cuh" #include "LogSoftmax.cuh"
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
#include "../core/CopyValues.h" #include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,8 +22,8 @@ ...@@ -22,8 +22,8 @@
#include "LogSoftmax.h" #include "LogSoftmax.h"
#include "LogSoftmax.cuh" #include "LogSoftmax.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "../core/ReduceSum.cuh" #include "../core/reduce/ReduceSum.cuh"
#include "../core/ReduceMax.cuh" #include "../core/reduce/ReduceMax.cuh"
#include "../XDevice.h" #include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
#include "Softmax.h" #include "Softmax.h"
#include "Softmax.cuh" #include "Softmax.cuh"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,10 +22,10 @@ ...@@ -22,10 +22,10 @@
#include "Softmax.h" #include "Softmax.h"
#include "Softmax.cuh" #include "Softmax.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/MultiplyElementWise.h" #include "../core/arithmetic/Multiply.h"
#include "../core/Unsqueeze.h" #include "../core/shape/Unsqueeze.h"
#include "../core/Sum.h" #include "../core/arithmetic/Sum.h"
#include "../XDevice.h" #include "../XDevice.h"
#include "../XUtility.h" #include "../XUtility.h"
...@@ -288,7 +288,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -288,7 +288,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize); beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize);
/* \beta = \sum_i (dE/dy_i * y_i) */ /* \beta = \sum_i (dE/dy_i * y_i) */
MultiplyElementWise(dedy, y, ytmp, 0); Multiply(dedy, y, ytmp, 0);
ReduceSum(ytmp, beta, leadDim); ReduceSum(ytmp, beta, leadDim);
/* ytmp = dE/dy_j - \beta */ /* ytmp = dE/dy_j - \beta */
...@@ -296,7 +296,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -296,7 +296,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
Sum(dedy, ytmp, ytmp, -1.0F); Sum(dedy, ytmp, ytmp, -1.0F);
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */ /* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
MultiplyElementWise(y, ytmp, dedx, 0); Multiply(y, ytmp, dedx, 0);
mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize); mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize);
mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize); mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize);
......
...@@ -485,7 +485,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) ...@@ -485,7 +485,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
InitTensor(&probs, &output); InitTensor(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */ /* probs[i,j] = output[i,j] * gold[i,j] */
MultiplyElementWise(&output, &gold, &probs, 0); Multiply(&output, &gold, &probs, 0);
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATE_H__ #ifndef __TEST_CONCATENATE_H__
#define __TEST_CONCATENATE_H__ #define __TEST_CONCATENATE_H__
#include "../core/Concatenate.h" #include "../core/shape/Concatenate.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATESOLELY_H__ #ifndef __TEST_CONCATENATESOLELY_H__
#define __TEST_CONCATENATESOLELY_H__ #define __TEST_CONCATENATESOLELY_H__
#include "../core/ConcatenateSolely.h" #include "../core/shape/ConcatenateSolely.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_COPYINDEXED_H__ #ifndef __TEST_COPYINDEXED_H__
#define __TEST_COPYINDEXED_H__ #define __TEST_COPYINDEXED_H__
#include "../core/CopyIndexed.h" #include "../core/movement/CopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_COPYVALUES_H__ #ifndef __TEST_COPYVALUES_H__
#define __TEST_COPYVALUES_H__ #define __TEST_COPYVALUES_H__
#include "../core/CopyValues.h" #include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include "../core/ScaleAndShift.h" #include "../core/math/ScaleAndShift.h"
#include "../function/Loss.h" #include "../function/Loss.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMULBATCHEDCPU_H__ #ifndef __TEST_MATRIXMULBATCHEDCPU_H__
#define __TEST_MATRIXMULBATCHEDCPU_H__ #define __TEST_MATRIXMULBATCHEDCPU_H__
#include "../core/MatrixMULBatchedCPU.h" #include "../core/arithmetic/MatrixMULBatchedCPU.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL_H__ #ifndef __TEST_MATRIXMUL_H__
#define __TEST_MATRIXMUL_H__ #define __TEST_MATRIXMUL_H__
#include "../core/MatrixMul.h" #include "../core/arithmetic/MatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL2D_H__ #ifndef __TEST_MATRIXMUL2D_H__
#define __TEST_MATRIXMUL2D_H__ #define __TEST_MATRIXMUL2D_H__
#include "../core/MatrixMul2D.h" #include "../core/arithmetic/MatrixMul2D.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL2DPARALLEL_H__ #ifndef __TEST_MATRIXMUL2DPARALLEL_H__
#define __TEST_MATRIXMUL2DPARALLEL_H__ #define __TEST_MATRIXMUL2DPARALLEL_H__
#include "../core/MatrixMul2DParallel.h" #include "../core/arithmetic/MatrixMul2DParallel.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMULBATCHED_H__ #ifndef __TEST_MATRIXMULBATCHED_H__
#define __TEST_MATRIXMULBATCHED_H__ #define __TEST_MATRIXMULBATCHED_H__
#include "../core/MatrixMulBatched.h" #include "../core/arithmetic/MatrixMulBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MERGE_H__ #ifndef __TEST_MERGE_H__
#define __TEST_MERGE_H__ #define __TEST_MERGE_H__
#include "../core/Merge.h" #include "../core/shape/Merge.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/ */
#include "TMultiplyElementWise.h" #include "TMultiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -28,7 +28,7 @@ case 1: element-wise product of two tensors ...@@ -28,7 +28,7 @@ case 1: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0. In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
*/ */
bool TestMultiplyElementWise1() bool TestMultiply1()
{ {
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
...@@ -81,7 +81,7 @@ bool TestMultiplyElementWise1() ...@@ -81,7 +81,7 @@ bool TestMultiplyElementWise1()
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
MultiplyElementWise(s1, s2, t, 0); Multiply(s1, s2, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -101,7 +101,7 @@ bool TestMultiplyElementWise1() ...@@ -101,7 +101,7 @@ bool TestMultiplyElementWise1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
MultiplyElementWise(sGPU1, sGPU2, tGPU, 0); Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -136,7 +136,7 @@ case 2: element-wise product of two tensors ...@@ -136,7 +136,7 @@ case 2: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0. In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/ */
bool TestMultiplyElementWise2() bool TestMultiply2()
{ {
/* a source tensor of size (2, 2) */ /* a source tensor of size (2, 2) */
int sOrder1 = 2; int sOrder1 = 2;
...@@ -189,7 +189,7 @@ bool TestMultiplyElementWise2() ...@@ -189,7 +189,7 @@ bool TestMultiplyElementWise2()
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
MultiplyElementWise(s1, s2, t, 0); Multiply(s1, s2, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -209,7 +209,7 @@ bool TestMultiplyElementWise2() ...@@ -209,7 +209,7 @@ bool TestMultiplyElementWise2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
MultiplyElementWise(sGPU1, sGPU2, tGPU, 0); Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -243,7 +243,7 @@ bool TestMultiplyElementWise2() ...@@ -243,7 +243,7 @@ bool TestMultiplyElementWise2()
case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i) case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0. In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
*/ */
bool TestMultiplyElementWise3() bool TestMultiply3()
{ {
/* a source tensor of size (2, 2) */ /* a source tensor of size (2, 2) */
int sOrder1 = 2; int sOrder1 = 2;
...@@ -296,7 +296,7 @@ bool TestMultiplyElementWise3() ...@@ -296,7 +296,7 @@ bool TestMultiplyElementWise3()
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
MultiplyElementWise(s1, s2, t, 1); Multiply(s1, s2, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -316,7 +316,7 @@ bool TestMultiplyElementWise3() ...@@ -316,7 +316,7 @@ bool TestMultiplyElementWise3()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call MultiplyElementWise function */
MultiplyElementWise(sGPU1, sGPU2, tGPU, 1); Multiply(sGPU1, sGPU2, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -352,13 +352,13 @@ TODO!! ...@@ -352,13 +352,13 @@ TODO!!
*/ */
/* test for MultiplyElementWise Function */ /* test for MultiplyElementWise Function */
bool TestMultiplyElementWise() bool TestMultiply()
{ {
XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors \n"); XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestMultiplyElementWise1(); caseFlag = TestMultiply1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
...@@ -368,7 +368,7 @@ bool TestMultiplyElementWise() ...@@ -368,7 +368,7 @@ bool TestMultiplyElementWise()
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */ /* case 2 test */
caseFlag = TestMultiplyElementWise2(); caseFlag = TestMultiply2();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
...@@ -378,7 +378,7 @@ bool TestMultiplyElementWise() ...@@ -378,7 +378,7 @@ bool TestMultiplyElementWise()
XPRINT(0, stdout, ">> case 2 passed!\n"); XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */ /* case 3 test */
caseFlag = TestMultiplyElementWise3(); caseFlag = TestMultiply3();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
......
...@@ -22,13 +22,13 @@ ...@@ -22,13 +22,13 @@
#ifndef __TEST_MULTIPLYELEMENTWISE_H__ #ifndef __TEST_MULTIPLYELEMENTWISE_H__
#define __TEST_MULTIPLYELEMENTWISE_H__ #define __TEST_MULTIPLYELEMENTWISE_H__
#include "../core/MultiplyElementWise.h" #include "../core/arithmetic/Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for MultiplyElementWise Function */ /* test for MultiplyElementWise Function */
extern "C" extern "C"
bool TestMultiplyElementWise(); bool TestMultiply();
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TEST_MULTIPLYELEMENTWISE_H__ #endif // __TEST_MULTIPLYELEMENTWISE_H__
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_NEGATE_H__ #ifndef __TEST_NEGATE_H__
#define __TEST_NEGATE_H__ #define __TEST_NEGATE_H__
#include "../core/Negate.h" #include "../core/arithmetic/Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_NORMALIZE_H__ #ifndef __TEST_NORMALIZE_H__
#define __TEST_NORMALIZE_H__ #define __TEST_NORMALIZE_H__
#include "../core/Normalize.h" #include "../core/math/Normalize.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_POWER_H__ #ifndef __TEST_POWER_H__
#define __TEST_POWER_H__ #define __TEST_POWER_H__
#include "../core/Power.h" #include "../core/math/Power.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCEMAX_H__ #ifndef __TEST_REDUCEMAX_H__
#define __TEST_REDUCEMAX_H__ #define __TEST_REDUCEMAX_H__
#include "../core/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCEMEAN_H__ #ifndef __TEST_REDUCEMEAN_H__
#define __TEST_REDUCEMEAN_H__ #define __TEST_REDUCEMEAN_H__
#include "../core/ReduceMean.h" #include "../core/reduce/ReduceMean.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCESUM_H__ #ifndef __TEST_REDUCESUM_H__
#define __TEST_REDUCESUM_H__ #define __TEST_REDUCESUM_H__
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCESUMSQUARED_H__ #ifndef __TEST_REDUCESUMSQUARED_H__
#define __TEST_REDUCESUMSQUARED_H__ #define __TEST_REDUCESUMSQUARED_H__
#include "../core/ReduceSumSquared.h" #include "../core/reduce/ReduceSumSquared.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCEVARIANCE_H__ #ifndef __TEST_REDUCEVARIANCE_H__
#define __TEST_REDUCEVARIANCE_H__ #define __TEST_REDUCEVARIANCE_H__
#include "../core/ReduceVariance.h" #include "../core/reduce/ReduceVariance.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SCALEANDSHIFT_H__ #ifndef __TEST_SCALEANDSHIFT_H__
#define __TEST_SCALEANDSHIFT_H__ #define __TEST_SCALEANDSHIFT_H__
#include "../core/ScaleAndShift.h" #include "../core/math/ScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
*/ */
#include "TSelect.h" #include "TSelect.h"
#include "../xc/Mycode.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -74,7 +73,7 @@ bool TestSelect1() ...@@ -74,7 +73,7 @@ bool TestSelect1()
t->SetZeroAll(); t->SetZeroAll();
/* call SelectRange function */ /* call SelectRange function */
SelectRange(s, 2, 1, 3, t); SelectRange(s, t, 2, 1, 3);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -92,7 +91,7 @@ bool TestSelect1() ...@@ -92,7 +91,7 @@ bool TestSelect1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call Select function */ /* call Select function */
SelectRange(sGPU, 2, 1, 3, tGPU); SelectRange(sGPU, tGPU, 2, 1, 3);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SELECT_H__ #ifndef __TEST_SELECT_H__
#define __TEST_SELECT_H__ #define __TEST_SELECT_H__
#include "../core/Select.h" #include "../core/getandset/Select.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SORT_H__ #ifndef __TEST_SORT_H__
#define __TEST_SORT_H__ #define __TEST_SORT_H__
#include "../core/Sort.h" #include "../core/sort/Sort.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SPLIT_H__ #ifndef __TEST_SPLIT_H__
#define __TEST_SPLIT_H__ #define __TEST_SPLIT_H__
#include "../core/Split.h" #include "../core/shape/Split.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SUM_H__ #ifndef __TEST_SUM_H__
#define __TEST_SUM_H__ #define __TEST_SUM_H__
#include "../core/Sum.h" #include "../core/arithmetic/Sum.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SUMBYCOLUMNTV_H__ #ifndef __TEST_SUMBYCOLUMNTV_H__
#define __TEST_SUMBYCOLUMNTV_H__ #define __TEST_SUMBYCOLUMNTV_H__
#include "../core/SumByColumnTV.h" #include "../core/arithmetic/SumByColumnTV.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SUMBYCOLUMNVT_H__ #ifndef __TEST_SUMBYCOLUMNVT_H__
#define __TEST_SUMBYCOLUMNVT_H__ #define __TEST_SUMBYCOLUMNVT_H__
#include "../core/SumByColumnVT.h" #include "../core/arithmetic/SumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_TOPK_H__ #ifndef __TEST_TOPK_H__
#define __TEST_TOPK_H__ #define __TEST_TOPK_H__
#include "../core/TopK.h" #include "../core/sort/TopK.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_UNSQUEEZE_H__ #ifndef __TEST_UNSQUEEZE_H__
#define __TEST_UNSQUEEZE_H__ #define __TEST_UNSQUEEZE_H__
#include "../core/Unsqueeze.h" #include "../core/shape/Unsqueeze.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -39,7 +39,7 @@ bool Test() ...@@ -39,7 +39,7 @@ bool Test()
//wrong = !TestMatrixMulBatched() || wrong; //wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong; wrong = !TestMatrixMulBatchedCPU() || wrong;
wrong = !TestMerge() || wrong; wrong = !TestMerge() || wrong;
wrong = !TestMultiplyElementWise() || wrong; wrong = !TestMultiply() || wrong;
wrong = !TestNegate() || wrong; wrong = !TestNegate() || wrong;
wrong = !TestNormalize() || wrong; wrong = !TestNormalize() || wrong;
wrong = !TestPower() || wrong; wrong = !TestPower() || wrong;
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#include "TMatrixMulBatched.h" #include "TMatrixMulBatched.h"
#include "TMatrixMULBatchedCPU.h" #include "TMatrixMULBatchedCPU.h"
#include "TMerge.h" #include "TMerge.h"
#include "TMultiplyElementWise.h" #include "TMultiply.h"
#include "TNegate.h" #include "TNegate.h"
#include "TNormalize.h" #include "TNormalize.h"
#include "TPower.h" #include "TPower.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论