Commit 414ff54f by liyinqiao

1. update with master; 2. class core src

parent 0887fae1
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XLink.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
int XLink::paramSize = 64;
/* constuctor */
XLink::XLink()
{
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/* deconstructor */
XLink::~XLink()
{
delete[] tails;
delete[] (char*)params;
}
/* reset it */
void XLink::Reset()
{
delete[] tails;
delete[] (char*)params;
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/*
set edge type name
>> typeName - type name in string
*/
void XLink::SetType(const char * typeName)
{
type[0] = 0;
if(typeName == NULL)
return;
strcpy(type, typeName);
}
/*
set head
>> h - pointer to the head tensor
*/
void XLink::SetHead(XTensor * h)
{
head = h;
}
/*
add a tail
>> t - pointer to the tail tensor
*/
void XLink::AddTail(XTensor * t)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 1];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t;
delete[] ts;
}
/*
add two tails in one time
>> t1 - pointer to the tail tensor
>> t2 - pointer to another tail tensor
*/
void XLink::AddTwoTails(XTensor * t1, XTensor * t2)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 2];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t1;
tails[tailNum++] = t2;
delete[] ts;
}
/*
add a parameter
>> param - parameter in default type
*/
void XLink::AddParam(DTYPE param)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize);
*p = param;
paramNum++;
delete[] (char*)ps;
}
/*
add a parameter
>> param - pointer to the parameter
>> size - size of the parameter
*/
void XLink::AddParam(void * param, int size)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
char * p = (char*)params + paramNum * paramSize;
memcpy(p, param, size);
paramNum++;
delete[] (char*)ps;
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - another tail tensor
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName)
{
if(h != NULL)
return;
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
if(t1 != NULL && t2 != NULL)
income.AddTwoTails(t1, t2);
else if(t1 != NULL)
income.AddTail(t1);
else{
ShowNTErrors("TODO!");
}
income.SetType(typeName);
/* backward for t1 */
if(t1 != NULL){
XLink &outgo = t1->outgo;
CheckNTErrors(outgo.head != t1, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
/* backward for t2 */
if(t2 != NULL){
XLink &outgo = t2->outgo;
CheckNTErrors(outgo.head != t2, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
create a hyper edge with a list of tensors and a output tensor
>> list - a list of input tensors
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XList * list, XTensor * h, const char * typeName)
{
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
income.SetType(typeName);
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
income.AddTail(t);
}
/* backward */
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
XLink &outgo = t->outgo;
CheckNTErrors(outgo.head != t, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
add parameters
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHead(XTensor * h, DTYPE param)
{
if(h != NULL)
return;
h->income.AddParam(param);
}
/*
add an integer parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadInt(XTensor * h, int param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(int));
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XGlobal.h"
#include "XTensor.h"
#ifndef __XLINK_H__
#define __XLINK_H__
#include "XGlobal.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
/*
This defines the link among tensors in networks. XLink can be
cast as a hyperedge in a graph. when we compute on tensors, we actually create a
network where nodes are tensors and edges the connections among them. Each connection is
a hyperedge whose head is the output tensor and tails are input tensors. E.g,
c = a + b
represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
+ (=c)
/ \
a b
for c, we have a incoming edge (a, b) -> c
for a, we also have a edge c -> a in the reverse order (in a view of acyclic directed graphs)
*/
struct XLink
{
/* head of the hyperedge */
XTensor * head;
/* tails of the hyperedge */
XTensor ** tails;
/* number of tails */
int tailNum;
/* parameters used. e.g., c = a * b * \alpha
scalar \alpha is the parameter */
void * params;
/* number of parameters */
int paramNum;
/* size of each parameter */
static int paramSize;
/* name of the hyperedge type. e.g., sum, mul ... */
char type[MAX_OP_NAME_LENGTH];
/* constuctor */
XLink();
/* deconstructor */
~XLink();
/* reset it */
void Reset();
/* set edge type name */
void SetType(const char * typeName);
/* set head */
void SetHead(XTensor * h);
/* add a tail */
void AddTail(XTensor * t);
/* add two tails in one time */
void AddTwoTails(XTensor * t1, XTensor * t2);
/* add a integer parameter */
void AddParam(DTYPE param);
/* add a integer parameter */
void AddParam(void * param, int size);
/* create a hyper edge with two input tensors and a output tensor */
static
void MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName);
/* create a hyper edge with a list of tensors and a output tensor */
static
void MakeLink(XList * list, XTensor * h, const char * typeName);
/* add a parameter */
static
void AddParamToHead(XTensor * h, DTYPE param);
/* add an integer parameter */
static
void AddParamToHeadInt(XTensor * h, int param);
};
} // namespace nts(NiuTrans.Tensor)
#endif // __XLINK_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* We define various names here
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
* It was really HOT these days. I can't imagine what a hot day here in Shenyang!
*/
#ifndef __XNAME_H__
#define __XNAME_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MATMUL "M_MATMUL"
#define MATH_CONCATENATESOLY "M_CONCATENATESOLY"
#define MATH_COPYVALUES "M_COPYVALUES"
#define MATH_MATRIXMUL "M_MATRIXMUL"
#define MATH_MATRIXMUL2D "M_MATRIXMUL2D"
#define MATH_MATRIXMULBATCHED "M_MATRIXMULBATCHED"
#define MATH_MERGE "M_MERGE"
#define MATH_MULTIPLY "M_MULTIPLY"
#define MATH_REDUCEMAX "M_REDUCEMAX"
#define MATH_REDUCESUM "M_REDUCESUM"
#define MATH_SELECTRANGE "M_SELECTRANGE"
#define MATH_SORT "M_SORT"
#define MATH_SUM "M_SUM"
#define MATH_TOPK "M_TOPK"
#define MATH_UNSQUEEZE "M_UNSQUEEZE"
} // namespace nts(NiuTrans.Tensor)
#endif // __XNAME_H__
\ No newline at end of file
......@@ -38,7 +38,7 @@
#include "XMem.h"
#include "XHeap.h"
#include "XBLAS.h"
#include "core/MergeBlockLists.h"
#include "core/shape/MergeBlockLists.h"
#ifdef USE_CUDA
......@@ -47,8 +47,8 @@
#include <cublas_v2.h>
#include <cuda.h>
#include <curand.h>
#include "core/FlushToMem.cuh"
#include "core/SetAscendingOrder.cuh"
#include "core/utilities/FlushToMem.cuh"
#include "core/utilities/SetAscendingOrder.cuh"
#endif
......
......@@ -21,7 +21,7 @@
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
* I'm working while most of the students are enjoying their holidays :(
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
* $Updated by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
......@@ -36,10 +36,14 @@
#include "XList.h"
#include "XDataType.h"
#include "XMem.h"
#include "XLink.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
/* cross reference */
struct XLink;
/* define the maximum number of dimensions in a tensor */
#define MAX_TENSOR_DIM_NUM 6
#define USE_BATCHED_STRIDED_MAT_MUL
......@@ -47,9 +51,7 @@ namespace nts{
#define MIN_TENSOR_SPLIT_LIST_NUM 1024
#define MIN_TENSOR_CAT_NUM 8
/*
computation flags
*/
/* computation flags */
#define UNSAFE_BUT_FAST_MEM
#define FAST_MATRIX
......@@ -59,7 +61,6 @@ is the parent class of XMatrix.
*/
struct XTensor
{
public:
/* memory pool */
XMem * mem;
......@@ -129,11 +130,24 @@ public:
/* indicates whether the tensor is initialized or not */
bool isInit;
/*
the link used to form networks. Note that when we compute on tensors, we actually create a
network where nodes are tensors and edges the connections among them. Each connection is
a hyperedge whose head is the output tensor and tails are input tensors. E.g,
c = a + b
represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
Here "income" keeps which nodes (tensors) are used to form the current node (tensor).
*/
XLink income;
/* It keeps which nodes (tensors) we go to from the current node (tensor). */
XLink outgo;
/*******************************************************************
XTensor untilities
*/
public:
/********************
XTensor untilities
********************/
/* constructor */
XTensor();
......
......@@ -403,7 +403,7 @@ int ToCPUInt(int devID, void * value)
}
}
/* set the value that is kept on a device */
/* assign a number to a variable that is kept on a specified device */
bool SetToDevice(int devID, void * p, DTYPE value)
{
if(p == NULL)
......@@ -412,7 +412,7 @@ bool SetToDevice(int devID, void * p, DTYPE value)
if(devID < 0)
*(DTYPE*)p = value;
else{
XMemCopy(p, devID, &value, -1, sizeof(DTYPE*));
XMemCopy(p, devID, &value, -1, sizeof(DTYPE));
}
return true;
......
......@@ -26,47 +26,49 @@
#include "../XTensor.h"
#include "Concatenate.h"
#include "ConcatenateSolely.h"
#include "CopyBlocks.h"
#include "CopyBlocksInGrid.h"
#include "CopyBlocksOnSite.h"
#include "CopyData2D.h"
#include "CopyIndexed.h"
#include "CopyInGrid.h"
#include "CopyValues.h"
#include "FlushToMem.h"
#include "MakeMergeBlockIndex.h"
#include "MakeSplitBlockIndex.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMul2DMultiTheading.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "Merge.h"
#include "MergeBlockLists.h"
#include "MultiplyElementWise.h"
#include "Negate.h"
#include "Normalize.h"
#include "Power.h"
#include "ReduceMax.h"
#include "ReduceMean.h"
#include "ReduceStandardVariance.h"
#include "ReduceSum.h"
#include "ReduceSumSquared.h"
#include "ReduceVariance.h"
#include "ScaleAndShift.h"
#include "Select.h"
#include "SetData.h"
#include "Sort.h"
#include "Split.h"
#include "Sum.h"
#include "SumByColumnTV.h"
#include "SumByColumnVT.h"
#include "TopK.h"
#include "Unsqueeze.h"
#include "XMatrixSegment.h"
#include "XTensorBLAS.h"
#include "shape/Concatenate.h"
#include "shape/ConcatenateSolely.h"
#include "movement/CopyBlocks.h"
#include "movement/CopyBlocksInGrid.h"
#include "movement/CopyBlocksOnSite.h"
#include "movement/CopyData2D.h"
#include "movement/CopyIndexed.h"
#include "movement/CopyInGrid.h"
#include "movement/CopyValues.h"
#include "utilities/FlushToMem.h"
#include "shape/MakeMergeBlockIndex.h"
#include "shape/MakeSplitBlockIndex.h"
#include "arithmetic/MatrixMul.h"
#include "arithmetic/MatrixMul2D.h"
#include "arithmetic/MatrixMul2DMultiTheading.h"
#include "arithmetic/MatrixMul2DParallel.h"
#include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/MatrixMULBatchedCPU.h"
#include "shape/Merge.h"
#include "shape/MergeBlockLists.h"
#include "arithmetic/Multiply.h"
#include "arithmetic/Negate.h"
#include "math/Normalize.h"
#include "shape/Permute.h"
#include "math/Power.h"
#include "reduce/ReduceMax.h"
#include "reduce/ReduceMean.h"
#include "reduce/ReduceStandardVariance.h"
#include "reduce/ReduceSum.h"
#include "reduce/ReduceSumSquared.h"
#include "reduce/ReduceVariance.h"
#include "math/ScaleAndShift.h"
#include "getandset/Select.h"
#include "getandset/SetData.h"
#include "sort/Sort.h"
#include "shape/Split.h"
#include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "sort/TopK.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "utilities/XMatrixSegment.h"
#include "arithmetic/XTensorBLAS.h"
#endif // __CHEADER_H__
\ No newline at end of file
......@@ -37,41 +37,33 @@ public:
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */
extern "C"
void Concatenate(XList * smalls, XTensor * big, int dim);
/* concatenate two tensors along a given dimension */
extern "C"
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim);
/* concatenate a list of tensors along a given dimension */
extern "C"
static
void ConcatenateSolely(XList * smalls, XTensor * big, int dim);
/* copy selected sub-tensors */
extern "C"
static
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
/* copy a number of blocks in grid */
extern "C"
static
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev = false);
/* copy s to t */
extern "C"
static
bool CopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
/* set target data block index for the data movement in merge */
extern "C"
static
void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem);
/* set target data block index for the data movement in split */
extern "C"
static
void MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem);
......@@ -86,7 +78,6 @@ public:
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs
normal matrix multiplication if A = y * z and B = x * y.
*/
extern "C"
static
void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
......@@ -96,7 +87,6 @@ public:
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
*/
extern "C"
static
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
......@@ -105,7 +95,6 @@ public:
matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/
extern "C"
static
void MatrixMul2DMultiTheading(XList * args);
......@@ -114,7 +103,6 @@ public:
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
*/
extern "C"
static
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
......@@ -126,36 +114,29 @@ public:
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
*/
extern "C"
static
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/* matrix multiplication in batch mode (CPU code) */
extern "C"
static
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
extern "C"
void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1);
void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */
extern "C"
void Merge(XList * smalls, XTensor * big, int whereToMerge);
/* merge data by blocks */
extern "C"
void MergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
/* element-wise product of two tensors */
extern "C"
static
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0);
/* set every entry to its minus value */
extern "C"
void Negate(XTensor * a);
/*
......@@ -163,16 +144,13 @@ public:
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
extern "C"
static
void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTensor * var, XTensor * a, XTensor * b, DTYPE epsilon);
/* get the power(x, y) */
extern "C"
void Power(XTensor * a, DTYPE p);
/* get the max value of the items along a dimension of the tensor. */
extern "C"
static
void ReduceMax(XTensor * input, XTensor * output, int dim);
......@@ -180,7 +158,6 @@ public:
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i
*/
extern "C"
static
void ReduceMean(XTensor * input, XTensor * output, int dim);
......@@ -188,7 +165,6 @@ public:
standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = (1/n * \sum_i (a_i - mean)^2)^0.5
*/
extern "C"
static
void ReduceStandardVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
......@@ -197,7 +173,6 @@ public:
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
*/
extern "C"
static
void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false);
......@@ -205,7 +180,6 @@ public:
squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
*/
extern "C"
static
void ReduceSumSquared(XTensor * input, XTensor * output, int dim, XTensor * shift);
......@@ -213,73 +187,59 @@ public:
variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2
*/
extern "C"
static
void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
/* scale and shift all tensor entires */
extern "C"
static
void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift);
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern "C"
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */
extern "C"
void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum);
/* tensor summation c = a + b * \beta */
extern "C"
static
void Sum(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* sum of a tensor and a (column) vector */
extern "C"
static
void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* sum of a (column) vector and a tensor */
extern "C"
static
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* get the top-k items along a given dimension */
extern "C"
static
void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k);
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
extern "C"
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize);
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize);
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
extern "C"
static
static
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
/* segment a block into sub-blocks */
extern "C"
static
int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex);
/* segment a block into sub-blocks */
extern "C"
static
int SegmentTensor2DInRows(int rowNum, int colNum, int blockNum, int * blockIndex);
/* matrix multiplication (BLAS) */
extern "C"
static
void MatrixMULCPU(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
#ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */
extern "C"
static
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
......@@ -288,7 +248,6 @@ public:
int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */
extern "C"
static
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
......@@ -297,7 +256,6 @@ public:
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
static
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
......@@ -306,7 +264,6 @@ public:
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */
extern "C"
static
void CudaBLASMatrixMULList(cublasHandle_t * handle, XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMULBatchedCPU.h"
#include "MatrixMul2D.h"
#include "XTensorBLAS.h"
......@@ -38,8 +38,8 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> beta - scalar
*/
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input lists!");
CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!");
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHEDCPU_H__
#define __MATRIXMULBATCHEDCPU_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMULBatchedCPU.h"
......@@ -54,10 +55,16 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!");
"Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
"Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL_H__
#define __MATRIXMUL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "MatrixMul2DParallel.h"
......@@ -51,6 +52,13 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL2D);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1];
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "XTensorBLAS.h"
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2D_H__
#define __MATRIXMUL2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DMultiTheading.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DMULTITHEADING_H__
#define __MATRIXMUL2DMULTITHEADING_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMul2DMultiTheading.h"
#include "XMatrixSegment.h"
#include "../utilities/XMatrixSegment.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DPARALLEL_H__
#define __MATRIXMUL2DPARALLEL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "XTensorBLAS.h"
......@@ -43,16 +44,22 @@ where trans() returns the transposed matrix if the flag is fired
>> parallelRunner - parallel processing module
*/
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner)
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!");
"Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
"Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHED_H__
#define __MATRIXMULBATCHED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "MultiplyElementWise.h"
#include "MultiplyElementWise.cuh"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -35,16 +36,21 @@ where i is the index of the item
>> alpha - the coefficient
>>
*/
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MULTIPLY);
XLink::AddParamToHeadInt(c, leadingDim);
XLink::AddParamToHead(c, alpha);
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
CudaMultiplyElementWise(a, b, c, leadingDim, alpha);
CudaMultiply(a, b, c, leadingDim, alpha);
return;
}
#endif
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "MultiplyElementWise.h"
#include "MultiplyElementWise.cuh"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -121,7 +121,7 @@ where i is the item index
>> alpha - the coefficient
*/
extern "C"
void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __MULTIPLYELEMENTWISE_CUH__
#define __MULTIPLYELEMENTWISE_CUH__
#ifndef __MULTIPLY_CUH__
#define __MULTIPLY_CUH__
#include "MultiplyElementWise.h"
#include "Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -42,11 +42,11 @@ void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE al
/* element-wise product of two tensors */
extern "C"
void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha);
void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYELEMENTWISE_CUH__
#endif // __MULTIPLY_CUH__
......@@ -19,17 +19,17 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __MULTIPLYELEMENTWISE_H__
#define __MULTIPLYELEMENTWISE_H__
#ifndef __MULTIPLY_H__
#define __MULTIPLY_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* element-wise product of two tensors */
extern "C"
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0);
void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYELEMENTWISE_H__
\ No newline at end of file
#endif // __MULTIPLY_H__
\ No newline at end of file
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sum.h"
#include "Sum.cuh"
......@@ -37,14 +38,15 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
if (c == NULL)
c = a;
CheckNTErrors((a && b && c),
"Empty tensors in addition!");
CheckNTErrors(a && b && c, "Empty tensors in addition!");
CheckNTErrors(a->unitNum == b->unitNum && a->unitNum == c->unitNum,
"Unmatched tensors in addition!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!");
CheckNTErrors((a->unitNum == b->unitNum && a->unitNum == c->unitNum),
"Unmatched tensors in addition!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Unmatched tensors in addition!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_SUM);
XLink::AddParamToHead(c, beta);
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../../XDevice.h"
#include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SUM_H__
#define __SUM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_CUH__
#define __REDUCEMAX_CUH__
#include "ReduceMax.h"
#include "../reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNTV_H__
#define __SUMBYCOLUMNTV_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNVT_H__
#define __SUMBYCOLUMNVT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,8 +20,8 @@
*/
#include "XTensorBLAS.h"
#include "../XTensor.h"
#include "../XBLAS.h"
#include "../../XTensor.h"
#include "../../XBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,16 +19,18 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XUtility.h"
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */
/*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
......@@ -83,7 +85,9 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
}
}
/* matrix multiplication via cuda version BLAS */
/*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
......@@ -139,6 +143,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
}
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
......@@ -193,7 +198,9 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
}
}
/* matrix multiplication via cuda version BLAS */
/*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULList(cublasHandle_t * handle,
XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
......
......@@ -22,7 +22,7 @@
#ifndef __XTENSORBLAS_H__
#define __XTENSORBLAS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include "../../XUtility.h"
#include "../../XName.h"
#include "Select.h"
#include "../XUtility.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -28,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
generate a tensor with seleccted data in range[low,high] along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> dim - the dimension along with which we do the job
>> low - lower bound
>> high - higher bound.
Note that range [1,3] means that we select 1 and 2.
>> c - result tensor
*/
void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
{
CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
......@@ -54,8 +55,14 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
}
}
int dimRDI = a->order - dim - 1;
/* make tensor connections */
XLink::MakeLink(a, NULL, c, MATH_SELECTRANGE);
XLink::AddParamToHeadInt(c, dim);
XLink::AddParamToHeadInt(c, low);
XLink::AddParamToHeadInt(c, high);
int stride = 1;
int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
......
......@@ -22,18 +22,18 @@
#ifndef __SELECT_H__
#define __SELECT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with seleccted data c = select(a) */
extern "C"
void Select(XTensor * a, XTensor * indexCPU, XTensor * c);
void Select(XTensor * a, XTensor * c, XTensor * indexCPU);
/* generate a tensor with seleccted data in range[low,high] along the given dimension
c = select(a) */
extern "C"
void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c);
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
*/
#include "SetData.h"
#include "CopyValues.h"
#include "../movement/CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
......
......@@ -23,7 +23,7 @@
#ifndef __SETDATA_H__
#define __SETDATA_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,7 @@
*/
#include <math.h>
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Normalize.h"
#include "Normalize.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Normalize.h"
#include "Normalize.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __NORMALIZE_H__
#define __NORMALIZE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,7 @@
*/
#include <math.h>
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Power.h"
#include "Power.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Power.h"
#include "Power.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __POWER_H__
#define __POWER_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
#include "ScaleAndShift.h"
#include "ScaleAndShift.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_CUH__
#define __SCALEANDSHIFT_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_H__
#define __SCALEANDSHIFT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CopyBlocks.h"
#include "CopyBlocksOnSite.h"
#include "CopyBlocksSelected.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_H__
#define __COPYBLOCKS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyBlocksInGrid.h"
#include "../XUtility.h"
#include "../../XUtility.h"
#include "CopyBlocksInGrid.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -39,7 +39,7 @@ Note that a grid may have a number of blocks
>> isIndexOnDev - indicates whether the index is on the device already
*/
void CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target,
int * index, int unitSize, bool isIndexOnDev, XMem * myMem)
int * index, int unitSize, bool isIndexOnDev, XMem * myMem)
{
CheckNTErrors((unitSize == sizeof(int)), "TODO!");
......
......@@ -21,7 +21,7 @@
#include "CopyBlocksInGrid.h"
#include "CopyBlocksInGrid.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_CUH__
#define __COPYBLOCKSINGRID_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_H__
#define __COPYBLOCKSINGRID_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh"
......
......@@ -21,7 +21,7 @@
#include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_CUH__
#define __COPYBLOCKS_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSONSITE_H__
#define __COPYBLOCKSONSITE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,8 +21,8 @@
#include "CopyBlocks.h"
#include "CopyBlocksSelected.cuh"
#include "../XUtility.h"
#include "../XDevice.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSSELECTED_CUH__
#define __COPYBLOCKSSELECTED_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyData2D.h"
#include "../XUtility.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYDATA2D_H__
#define __COPYDATA2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyInGrid.h"
#include "CopyBlocksInGrid.h"
......
......@@ -22,7 +22,7 @@
#ifndef __COPYINGRID_H__
#define __COPYINGRID_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYINDEXED_H__
#define __COPYINDEXED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XName.h"
#include "CopyValues.h"
#include "CopyValues.cuh"
......@@ -41,6 +42,9 @@ bool CopyValues(XTensor * s, XTensor * t, XStream * stream)
CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!");
CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!");
/* make tensor connections */
XLink::MakeLink(s, NULL, t, MATH_COPYVALUES);
if ((s->dataType == X_FLOAT16 && t->dataType == X_FLOAT) ||
(s->dataType == X_FLOAT && t->dataType == X_FLOAT16)) {
CheckNTErrors(((s->devID < 0 && t->devID < 0) || s->devID == t->devID),
......
......@@ -21,8 +21,8 @@
#include "CopyValues.h"
#include "CopyValues.cuh"
#include "../XUtility.h"
#include "../XDevice.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_CUH__
#define __COPYVALUES_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_H__
#define __COPYVALUES_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......@@ -34,7 +35,7 @@ get the max value of the items along a dimension of the tensor.
void ReduceMax(XTensor * input, XTensor * output, int dim)
{
CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)),
"This code must be run on the same device!");
"This code must be run on the same device!");
CheckNTErrors((input && output), "Empty input or output tensors!");
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
......@@ -44,14 +45,18 @@ void ReduceMax(XTensor * input, XTensor * output, int dim)
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i]),
"Unmatched tensors!");
"Unmatched tensors!");
}
else if(i > dimRDI){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i - 1]),
"Unmatched tensors!");
"Unmatched tensors!");
}
}
/* make tensor connections */
XLink::MakeLink(input, NULL, output, MATH_REDUCEMAX);
XLink::AddParamToHeadInt(output, dim);
if(input->devID >= 0){
#ifdef USE_CUDA
CudaReduceMax(input, output, dim);
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_H__
#define __REDUCEMAX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "ScaleAndShift.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceMean.h"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMEAN_H__
#define __REDUCEMEAN_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESTANDARDVARIANCE_H__
#define __REDUCESTANDARDVARIANCE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "ReduceSum.h"
#include "ReduceSum.cuh"
#include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -58,6 +59,12 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP
}
}
/* make tensor connections */
XLink::MakeLink(input, shift, output, MATH_REDUCESUM);
XLink::AddParamToHeadInt(output, dim);
XLink::AddParamToHead(output, power);
XLink::AddParamToHeadInt(output, isExp);
if(input->devID >= 0){
#ifdef USE_CUDA
CudaReduceSum(input, output, dim, shift, power, isExp);
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "ReduceSum.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESUM_H__
#define __REDUCESUM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESUMSQUARED_H__
#define __REDUCESUMSQUARED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "ScaleAndShift.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceVariance.h"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEVARIANCE_H__
#define __REDUCEVARIANCE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Concatenate.h"
#include "Merge.h"
#include "ConcatenateSolely.h"
......
......@@ -22,7 +22,7 @@
#ifndef __CONCATENATE_H__
#define __CONCATENATE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "ConcatenateSolely.h"
#include "MergeBlockLists.h"
......@@ -36,6 +37,10 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
{
CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!");
/* make tensor connections */
XLink::MakeLink(smalls, big, MATH_CONCATENATESOLY);
XLink::AddParamToHeadInt(big, dim);
int catDimSize = 0;
int dimRDI = big->order - dim - 1;
......
......@@ -22,7 +22,7 @@
#ifndef __CONCATENATESOLELY_H__
#define __CONCATENATESOLELY_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh"
......@@ -36,7 +36,7 @@ set target data block index for the data movement in merge
>> mem - the memory pool
*/
void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem)
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem)
{
if (mem != NULL && mem->devID >= 0) {
#ifdef USE_CUDA
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __CUDAMAKEMERGEBLOCKINDEX_CUH__
#define __CUDAMAKEMERGEBLOCKINDEX_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MAKEMERGEBLOCKINDEX_H__
#define __MAKEMERGEBLOCKINDEX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __MAKESPLITBLOCKINDEX_H__
#define __MAKESPLITBLOCKINDEX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,11 +19,12 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "Merge.h"
#include "MakeMergeBlockIndex.h"
#include "CopyBlocksOnSite.h"
#include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -62,6 +63,11 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
}
}
/* make tensor connections */
XLink::MakeLink(s, NULL, t, MATH_MERGE);
XLink::AddParamToHeadInt(t, whereToMerge);
XLink::AddParamToHeadInt(t, leadingDim);
int blockSize = 1;
int blockNum = 1;
int gridSize = 1;
......
......@@ -22,7 +22,7 @@
#ifndef __MERGE_H__
#define __MERGE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "MergeBlockLists.h"
#include "MergeBlockLists.cuh"
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "MergeBlockLists.h"
#include "MergeBlockLists.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __MERGEBLOCKLISTS_H__
#define __MERGEBLOCKLISTS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-05
*/
#ifndef __PERMUTE_H__
#define __PERMUTE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define permute _Permute_
/* generate the tensor with permuted dimensions: b = permuted(a) */
extern "C"
void Permute(XTensor * a, XTensor * b, int * dimPermute);
/* permute the tensor dimensions on site: a = permuted(a) */
extern "C"
void Permute_(XTensor * a, int * dimPermute);
/* make a tensor with permuted dimensions: b = permuted(a) and return its pointer */
extern "C"
XTensor * _Permute(XTensor *a, int * dimPermute);
/* make a tensor with permuted dimensions: b = permuted(a) and return its body */
extern "C"
XTensor& _Permute_(XTensor &a, int * dimPermute);
} // namespace nts(NiuTrans.Tensor)
#endif // __PERMUTE_H__
......@@ -19,11 +19,11 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "Split.h"
#include "MakeSplitBlockIndex.h"
#include "CopyBlocksOnSite.h"
#include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SPLIT_H__
#define __SPLIT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-05
* It will rain tomorrow - end of the hot days :)
*/
#ifndef __TRANSPOSE_H__
#define __TRANSPOSE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define transpose _Transpose_
/* generate a transposed 1D/2D tensor: b = transposed(a) */
void Transpose(XTensor * a, XTensor * b);
/* transpose a 1D/2D tensor on site: a = transposed(a) */
void Transpose_(XTensor * a);
/* make a transposed 1D/2D tensor: b = transposed(a) and return its pointer */
XTensor * _Transpose(XTensor * a);
/* make a transposed 1D/2D tensor: b = transposed(a) and return its body */
XTensor & _Transpose_(XTensor & a);
} // namespace nts(NiuTrans.Tensor)
#endif // __TRANSPOSE_H__
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Unsqueeze.h"
#include "MergeBlockLists.h"
#include "Unsqueeze.cuh"
......@@ -39,6 +40,11 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize)
CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!");
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!");
/* make tensor connections */
XLink::MakeLink(a, NULL, b, MATH_UNSQUEEZE);
XLink::AddParamToHeadInt(b, dim);
XLink::AddParamToHeadInt(b, dSize);
int dimRDI = b->order - dim - 1;
for (int i = 0; i < b->order; i++) {
if (i < dimRDI) {
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Unsqueeze.h"
#include "Unsqueeze.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __UNSQUEEZE_H__
#define __UNSQUEEZE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "Sort.h"
#include "Sort.cuh"
......@@ -38,8 +39,11 @@ void Sort(XTensor * a, XTensor * index, int dim)
CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1;
/* make tensor connections */
XLink::MakeLink(a, NULL, index, MATH_SORT);
XLink::AddParamToHeadInt(index, dim);
int dimRDI = a->order - dim - 1;
/* make the index tensor */
index->SetAscendingOrder(dim);
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "Sort.h"
#include "Sort.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __SORT_H__
#define __SORT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "TopK.h"
#include "TopK.cuh"
......@@ -40,6 +41,11 @@ void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((index == NULL || a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
/* make tensor connections */
XLink::MakeLink(a, b, index, MATH_TOPK);
XLink::AddParamToHeadInt(index, dim);
XLink::AddParamToHeadInt(index, k);
int dimRDI = a->order - dim - 1;
for (int i = 0; i < a->order; i++) {
if (i == dimRDI) {
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "TopK.h"
#include "TopK.cuh"
#include "Sort.cuh"
......@@ -95,11 +95,6 @@ public:
/* swap */
__device__ void Swap(int i, int j)
{
/*
CudaHeapNode<T> tmp = items[i];
items[i] = items[j];
items[j] = tmp;
*/
int tmpIndex = items[i].index;
T tmpValue = items[i].value;
items[i] = items[j];
......@@ -242,9 +237,9 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/*
merge the result over the workers.
merge the result over the workers.
This can be improved by parallel merging
*/
*/
if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
......@@ -433,7 +428,6 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
}
}
/* we resort to sorting if the data cannot fit inside the shared memory */
else {
int dimSize[MAX_TENSOR_DIM_NUM];
......@@ -449,8 +443,6 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
if (a->mem != NULL)
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
else
XMemFree(a->devID, indexA->data);
delete indexA;
}
......
......@@ -22,7 +22,7 @@
#ifndef __TOPK_H__
#define __TOPK_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
*/
#include "../XUtility.h"
#include "../../XUtility.h"
#include "FlushToMem.h"
#include "FlushToMem.cuh"
......
......@@ -20,7 +20,7 @@
*/
#include "FlushToMem.cuh"
#include "../XUtility.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_CUH__
#define __FLUSHTOMEM_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_H__
#define __FLUSHTOMEM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,7 @@
*/
#include "SetAscendingOrder.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SETASCENDINGORDER_CUH__
#define __SETASCENDINGORDER_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __XMATRIXSEGMENT_H__
#define __XMATRIXSEGMENT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
#include "Identity.h"
#include "../XUtility.h"
#include "../core/CopyValues.h"
#include "../core/movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -23,9 +23,9 @@
#include "../XUtility.h"
#include "LogSoftmax.h"
#include "LogSoftmax.cuh"
#include "../core/ReduceSum.h"
#include "../core/ReduceMax.h"
#include "../core/CopyValues.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
#include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,8 +22,8 @@
#include "LogSoftmax.h"
#include "LogSoftmax.cuh"
#include "Loss.cuh"
#include "../core/ReduceSum.cuh"
#include "../core/ReduceMax.cuh"
#include "../core/reduce/ReduceSum.cuh"
#include "../core/reduce/ReduceMax.cuh"
#include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -23,8 +23,8 @@
#include "Softmax.h"
#include "Softmax.cuh"
#include "../XUtility.h"
#include "../core/ReduceSum.h"
#include "../core/ReduceMax.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,10 +22,10 @@
#include "Softmax.h"
#include "Softmax.cuh"
#include "Loss.cuh"
#include "../core/ReduceSum.h"
#include "../core/MultiplyElementWise.h"
#include "../core/Unsqueeze.h"
#include "../core/Sum.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/shape/Unsqueeze.h"
#include "../core/arithmetic/Sum.h"
#include "../XDevice.h"
#include "../XUtility.h"
......@@ -288,7 +288,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize);
/* \beta = \sum_i (dE/dy_i * y_i) */
MultiplyElementWise(dedy, y, ytmp, 0);
Multiply(dedy, y, ytmp, 0);
ReduceSum(ytmp, beta, leadDim);
/* ytmp = dE/dy_j - \beta */
......@@ -296,7 +296,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
Sum(dedy, ytmp, ytmp, -1.0F);
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
MultiplyElementWise(y, ytmp, dedx, 0);
Multiply(y, ytmp, dedx, 0);
mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize);
mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize);
......
......@@ -485,7 +485,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
InitTensor(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */
MultiplyElementWise(&output, &gold, &probs, 0);
Multiply(&output, &gold, &probs, 0);
/* probability of each word */
XTensor wprobs;
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATE_H__
#define __TEST_CONCATENATE_H__
#include "../core/Concatenate.h"
#include "../core/shape/Concatenate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATESOLELY_H__
#define __TEST_CONCATENATESOLELY_H__
#include "../core/ConcatenateSolely.h"
#include "../core/shape/ConcatenateSolely.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_COPYINDEXED_H__
#define __TEST_COPYINDEXED_H__
#include "../core/CopyIndexed.h"
#include "../core/movement/CopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_COPYVALUES_H__
#define __TEST_COPYVALUES_H__
#include "../core/CopyValues.h"
#include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/
#include "../core/ScaleAndShift.h"
#include "../core/math/ScaleAndShift.h"
#include "../function/Loss.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMULBATCHEDCPU_H__
#define __TEST_MATRIXMULBATCHEDCPU_H__
#include "../core/MatrixMULBatchedCPU.h"
#include "../core/arithmetic/MatrixMULBatchedCPU.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL_H__
#define __TEST_MATRIXMUL_H__
#include "../core/MatrixMul.h"
#include "../core/arithmetic/MatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL2D_H__
#define __TEST_MATRIXMUL2D_H__
#include "../core/MatrixMul2D.h"
#include "../core/arithmetic/MatrixMul2D.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL2DPARALLEL_H__
#define __TEST_MATRIXMUL2DPARALLEL_H__
#include "../core/MatrixMul2DParallel.h"
#include "../core/arithmetic/MatrixMul2DParallel.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMULBATCHED_H__
#define __TEST_MATRIXMULBATCHED_H__
#include "../core/MatrixMulBatched.h"
#include "../core/arithmetic/MatrixMulBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MERGE_H__
#define __TEST_MERGE_H__
#include "../core/Merge.h"
#include "../core/shape/Merge.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/
#include "TMultiplyElementWise.h"
#include "TMultiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -28,7 +28,7 @@ case 1: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
*/
bool TestMultiplyElementWise1()
bool TestMultiply1()
{
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -81,7 +81,7 @@ bool TestMultiplyElementWise1()
t->SetZeroAll();
/* call MultiplyElementWise function */
MultiplyElementWise(s1, s2, t, 0);
Multiply(s1, s2, t, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -101,7 +101,7 @@ bool TestMultiplyElementWise1()
tGPU->SetZeroAll();
/* call MultiplyElementWise function */
MultiplyElementWise(sGPU1, sGPU2, tGPU, 0);
Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
......@@ -136,7 +136,7 @@ case 2: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
bool TestMultiplyElementWise2()
bool TestMultiply2()
{
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
......@@ -189,7 +189,7 @@ bool TestMultiplyElementWise2()
t->SetZeroAll();
/* call MultiplyElementWise function */
MultiplyElementWise(s1, s2, t, 0);
Multiply(s1, s2, t, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -209,7 +209,7 @@ bool TestMultiplyElementWise2()
tGPU->SetZeroAll();
/* call MultiplyElementWise function */
MultiplyElementWise(sGPU1, sGPU2, tGPU, 0);
Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
......@@ -243,7 +243,7 @@ bool TestMultiplyElementWise2()
case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
*/
bool TestMultiplyElementWise3()
bool TestMultiply3()
{
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
......@@ -296,7 +296,7 @@ bool TestMultiplyElementWise3()
t->SetZeroAll();
/* call MultiplyElementWise function */
MultiplyElementWise(s1, s2, t, 1);
Multiply(s1, s2, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -316,7 +316,7 @@ bool TestMultiplyElementWise3()
tGPU->SetZeroAll();
/* call MultiplyElementWise function */
MultiplyElementWise(sGPU1, sGPU2, tGPU, 1);
Multiply(sGPU1, sGPU2, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
......@@ -352,13 +352,13 @@ TODO!!
*/
/* test for MultiplyElementWise Function */
bool TestMultiplyElementWise()
bool TestMultiply()
{
XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestMultiplyElementWise1();
caseFlag = TestMultiply1();
if (!caseFlag) {
returnFlag = false;
......@@ -368,7 +368,7 @@ bool TestMultiplyElementWise()
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestMultiplyElementWise2();
caseFlag = TestMultiply2();
if (!caseFlag) {
returnFlag = false;
......@@ -378,7 +378,7 @@ bool TestMultiplyElementWise()
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestMultiplyElementWise3();
caseFlag = TestMultiply3();
if (!caseFlag) {
returnFlag = false;
......
......@@ -22,13 +22,13 @@
#ifndef __TEST_MULTIPLYELEMENTWISE_H__
#define __TEST_MULTIPLYELEMENTWISE_H__
#include "../core/MultiplyElementWise.h"
#include "../core/arithmetic/Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for MultiplyElementWise Function */
extern "C"
bool TestMultiplyElementWise();
bool TestMultiply();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_MULTIPLYELEMENTWISE_H__
......@@ -22,7 +22,7 @@
#ifndef __TEST_NEGATE_H__
#define __TEST_NEGATE_H__
#include "../core/Negate.h"
#include "../core/arithmetic/Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_NORMALIZE_H__
#define __TEST_NORMALIZE_H__
#include "../core/Normalize.h"
#include "../core/math/Normalize.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_POWER_H__
#define __TEST_POWER_H__
#include "../core/Power.h"
#include "../core/math/Power.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCEMAX_H__
#define __TEST_REDUCEMAX_H__
#include "../core/ReduceMax.h"
#include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCEMEAN_H__
#define __TEST_REDUCEMEAN_H__
#include "../core/ReduceMean.h"
#include "../core/reduce/ReduceMean.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCESUM_H__
#define __TEST_REDUCESUM_H__
#include "../core/ReduceSum.h"
#include "../core/reduce/ReduceSum.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCESUMSQUARED_H__
#define __TEST_REDUCESUMSQUARED_H__
#include "../core/ReduceSumSquared.h"
#include "../core/reduce/ReduceSumSquared.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_REDUCEVARIANCE_H__
#define __TEST_REDUCEVARIANCE_H__
#include "../core/ReduceVariance.h"
#include "../core/reduce/ReduceVariance.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SCALEANDSHIFT_H__
#define __TEST_SCALEANDSHIFT_H__
#include "../core/ScaleAndShift.h"
#include "../core/math/ScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,6 @@
*/
#include "TSelect.h"
#include "../xc/Mycode.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -74,7 +73,7 @@ bool TestSelect1()
t->SetZeroAll();
/* call SelectRange function */
SelectRange(s, 2, 1, 3, t);
SelectRange(s, t, 2, 1, 3);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -92,7 +91,7 @@ bool TestSelect1()
tGPU->SetZeroAll();
/* call Select function */
SelectRange(sGPU, 2, 1, 3, tGPU);
SelectRange(sGPU, tGPU, 2, 1, 3);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SELECT_H__
#define __TEST_SELECT_H__
#include "../core/Select.h"
#include "../core/getandset/Select.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SORT_H__
#define __TEST_SORT_H__
#include "../core/Sort.h"
#include "../core/sort/Sort.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SPLIT_H__
#define __TEST_SPLIT_H__
#include "../core/Split.h"
#include "../core/shape/Split.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SUM_H__
#define __TEST_SUM_H__
#include "../core/Sum.h"
#include "../core/arithmetic/Sum.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SUMBYCOLUMNTV_H__
#define __TEST_SUMBYCOLUMNTV_H__
#include "../core/SumByColumnTV.h"
#include "../core/arithmetic/SumByColumnTV.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SUMBYCOLUMNVT_H__
#define __TEST_SUMBYCOLUMNVT_H__
#include "../core/SumByColumnVT.h"
#include "../core/arithmetic/SumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_TOPK_H__
#define __TEST_TOPK_H__
#include "../core/TopK.h"
#include "../core/sort/TopK.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_UNSQUEEZE_H__
#define __TEST_UNSQUEEZE_H__
#include "../core/Unsqueeze.h"
#include "../core/shape/Unsqueeze.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -39,7 +39,7 @@ bool Test()
//wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong;
wrong = !TestMerge() || wrong;
wrong = !TestMultiplyElementWise() || wrong;
wrong = !TestMultiply() || wrong;
wrong = !TestNegate() || wrong;
wrong = !TestNormalize() || wrong;
wrong = !TestPower() || wrong;
......
......@@ -32,7 +32,7 @@
#include "TMatrixMulBatched.h"
#include "TMatrixMULBatchedCPU.h"
#include "TMerge.h"
#include "TMultiplyElementWise.h"
#include "TMultiply.h"
#include "TNegate.h"
#include "TNormalize.h"
#include "TPower.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论