Commit 414ff54f by liyinqiao

1. update with master; 2. class core src

parent 0887fae1
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XLink.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
int XLink::paramSize = 64;
/* constuctor */
XLink::XLink()
{
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/* deconstructor */
XLink::~XLink()
{
delete[] tails;
delete[] (char*)params;
}
/* reset it */
void XLink::Reset()
{
delete[] tails;
delete[] (char*)params;
head = NULL;
tails = NULL;
params = NULL;
tailNum = 0;
paramNum = 0;
type[0] = 0;
}
/*
set edge type name
>> typeName - type name in string
*/
void XLink::SetType(const char * typeName)
{
type[0] = 0;
if(typeName == NULL)
return;
strcpy(type, typeName);
}
/*
set head
>> h - pointer to the head tensor
*/
void XLink::SetHead(XTensor * h)
{
head = h;
}
/*
add a tail
>> t - pointer to the tail tensor
*/
void XLink::AddTail(XTensor * t)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 1];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t;
delete[] ts;
}
/*
add two tails in one time
>> t1 - pointer to the tail tensor
>> t2 - pointer to another tail tensor
*/
void XLink::AddTwoTails(XTensor * t1, XTensor * t2)
{
XTensor ** ts = tails;
tails = new XTensor*[tailNum + 2];
memcpy(tails, ts, sizeof(XTensor*) * tailNum);
tails[tailNum++] = t1;
tails[tailNum++] = t2;
delete[] ts;
}
/*
add a parameter
>> param - parameter in default type
*/
void XLink::AddParam(DTYPE param)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize);
*p = param;
paramNum++;
delete[] (char*)ps;
}
/*
add a parameter
>> param - pointer to the parameter
>> size - size of the parameter
*/
void XLink::AddParam(void * param, int size)
{
void * ps = params;
params = new char[paramNum + 1];
memcpy(params, ps, paramNum * paramSize);
char * p = (char*)params + paramNum * paramSize;
memcpy(p, param, size);
paramNum++;
delete[] (char*)ps;
}
/*
create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor
>> t2 - another tail tensor
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName)
{
if(h != NULL)
return;
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
if(t1 != NULL && t2 != NULL)
income.AddTwoTails(t1, t2);
else if(t1 != NULL)
income.AddTail(t1);
else{
ShowNTErrors("TODO!");
}
income.SetType(typeName);
/* backward for t1 */
if(t1 != NULL){
XLink &outgo = t1->outgo;
CheckNTErrors(outgo.head != t1, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
/* backward for t2 */
if(t2 != NULL){
XLink &outgo = t2->outgo;
CheckNTErrors(outgo.head != t2, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
create a hyper edge with a list of tensors and a output tensor
>> list - a list of input tensors
>> h - head tensor
>> typeName - name of edge type
*/
void XLink::MakeLink(XList * list, XTensor * h, const char * typeName)
{
/* forward */
XLink &income = h->income;
income.Reset();
income.SetHead(h);
income.SetType(typeName);
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
income.AddTail(t);
}
/* backward */
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
XLink &outgo = t->outgo;
CheckNTErrors(outgo.head != t, "Wrong head of the hyperedge!");
outgo.AddTail(h);
}
}
/*
add parameters
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHead(XTensor * h, DTYPE param)
{
if(h != NULL)
return;
h->income.AddParam(param);
}
/*
add an integer parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadInt(XTensor * h, int param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(int));
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include <stdio.h>
#include "XGlobal.h"
#include "XTensor.h"
#ifndef __XLINK_H__
#define __XLINK_H__
#include "XGlobal.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
/*
This defines the link among tensors in networks. XLink can be
cast as a hyperedge in a graph. when we compute on tensors, we actually create a
network where nodes are tensors and edges the connections among them. Each connection is
a hyperedge whose head is the output tensor and tails are input tensors. E.g,
c = a + b
represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
+ (=c)
/ \
a b
for c, we have a incoming edge (a, b) -> c
for a, we also have a edge c -> a in the reverse order (in a view of acyclic directed graphs)
*/
struct XLink
{
/* head of the hyperedge */
XTensor * head;
/* tails of the hyperedge */
XTensor ** tails;
/* number of tails */
int tailNum;
/* parameters used. e.g., c = a * b * \alpha
scalar \alpha is the parameter */
void * params;
/* number of parameters */
int paramNum;
/* size of each parameter */
static int paramSize;
/* name of the hyperedge type. e.g., sum, mul ... */
char type[MAX_OP_NAME_LENGTH];
/* constuctor */
XLink();
/* deconstructor */
~XLink();
/* reset it */
void Reset();
/* set edge type name */
void SetType(const char * typeName);
/* set head */
void SetHead(XTensor * h);
/* add a tail */
void AddTail(XTensor * t);
/* add two tails in one time */
void AddTwoTails(XTensor * t1, XTensor * t2);
/* add a integer parameter */
void AddParam(DTYPE param);
/* add a integer parameter */
void AddParam(void * param, int size);
/* create a hyper edge with two input tensors and a output tensor */
static
void MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName);
/* create a hyper edge with a list of tensors and a output tensor */
static
void MakeLink(XList * list, XTensor * h, const char * typeName);
/* add a parameter */
static
void AddParamToHead(XTensor * h, DTYPE param);
/* add an integer parameter */
static
void AddParamToHeadInt(XTensor * h, int param);
};
} // namespace nts(NiuTrans.Tensor)
#endif // __XLINK_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* We define various names here
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
* It was really HOT these days. I can't imagine what a hot day here in Shenyang!
*/
#ifndef __XNAME_H__
#define __XNAME_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MATMUL "M_MATMUL"
#define MATH_CONCATENATESOLY "M_CONCATENATESOLY"
#define MATH_COPYVALUES "M_COPYVALUES"
#define MATH_MATRIXMUL "M_MATRIXMUL"
#define MATH_MATRIXMUL2D "M_MATRIXMUL2D"
#define MATH_MATRIXMULBATCHED "M_MATRIXMULBATCHED"
#define MATH_MERGE "M_MERGE"
#define MATH_MULTIPLY "M_MULTIPLY"
#define MATH_REDUCEMAX "M_REDUCEMAX"
#define MATH_REDUCESUM "M_REDUCESUM"
#define MATH_SELECTRANGE "M_SELECTRANGE"
#define MATH_SORT "M_SORT"
#define MATH_SUM "M_SUM"
#define MATH_TOPK "M_TOPK"
#define MATH_UNSQUEEZE "M_UNSQUEEZE"
} // namespace nts(NiuTrans.Tensor)
#endif // __XNAME_H__
\ No newline at end of file
......@@ -38,7 +38,7 @@
#include "XMem.h"
#include "XHeap.h"
#include "XBLAS.h"
#include "core/MergeBlockLists.h"
#include "core/shape/MergeBlockLists.h"
#ifdef USE_CUDA
......@@ -47,8 +47,8 @@
#include <cublas_v2.h>
#include <cuda.h>
#include <curand.h>
#include "core/FlushToMem.cuh"
#include "core/SetAscendingOrder.cuh"
#include "core/utilities/FlushToMem.cuh"
#include "core/utilities/SetAscendingOrder.cuh"
#endif
......
......@@ -21,7 +21,7 @@
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
* I'm working while most of the students are enjoying their holidays :(
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
* $Updated by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
......@@ -36,10 +36,14 @@
#include "XList.h"
#include "XDataType.h"
#include "XMem.h"
#include "XLink.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
/* cross reference */
struct XLink;
/* define the maximum number of dimensions in a tensor */
#define MAX_TENSOR_DIM_NUM 6
#define USE_BATCHED_STRIDED_MAT_MUL
......@@ -47,9 +51,7 @@ namespace nts{
#define MIN_TENSOR_SPLIT_LIST_NUM 1024
#define MIN_TENSOR_CAT_NUM 8
/*
computation flags
*/
/* computation flags */
#define UNSAFE_BUT_FAST_MEM
#define FAST_MATRIX
......@@ -59,7 +61,6 @@ is the parent class of XMatrix.
*/
struct XTensor
{
public:
/* memory pool */
XMem * mem;
......@@ -129,11 +130,24 @@ public:
/* indicates whether the tensor is initialized or not */
bool isInit;
/*
the link used to form networks. Note that when we compute on tensors, we actually create a
network where nodes are tensors and edges the connections among them. Each connection is
a hyperedge whose head is the output tensor and tails are input tensors. E.g,
c = a + b
represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
Here "income" keeps which nodes (tensors) are used to form the current node (tensor).
*/
XLink income;
/* It keeps which nodes (tensors) we go to from the current node (tensor). */
XLink outgo;
/*******************************************************************
XTensor untilities
*/
public:
/********************
XTensor untilities
********************/
/* constructor */
XTensor();
......
......@@ -403,7 +403,7 @@ int ToCPUInt(int devID, void * value)
}
}
/* set the value that is kept on a device */
/* assign a number to a variable that is kept on a specified device */
bool SetToDevice(int devID, void * p, DTYPE value)
{
if(p == NULL)
......@@ -412,7 +412,7 @@ bool SetToDevice(int devID, void * p, DTYPE value)
if(devID < 0)
*(DTYPE*)p = value;
else{
XMemCopy(p, devID, &value, -1, sizeof(DTYPE*));
XMemCopy(p, devID, &value, -1, sizeof(DTYPE));
}
return true;
......
......@@ -26,47 +26,49 @@
#include "../XTensor.h"
#include "Concatenate.h"
#include "ConcatenateSolely.h"
#include "CopyBlocks.h"
#include "CopyBlocksInGrid.h"
#include "CopyBlocksOnSite.h"
#include "CopyData2D.h"
#include "CopyIndexed.h"
#include "CopyInGrid.h"
#include "CopyValues.h"
#include "FlushToMem.h"
#include "MakeMergeBlockIndex.h"
#include "MakeSplitBlockIndex.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMul2DMultiTheading.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "Merge.h"
#include "MergeBlockLists.h"
#include "MultiplyElementWise.h"
#include "Negate.h"
#include "Normalize.h"
#include "Power.h"
#include "ReduceMax.h"
#include "ReduceMean.h"
#include "ReduceStandardVariance.h"
#include "ReduceSum.h"
#include "ReduceSumSquared.h"
#include "ReduceVariance.h"
#include "ScaleAndShift.h"
#include "Select.h"
#include "SetData.h"
#include "Sort.h"
#include "Split.h"
#include "Sum.h"
#include "SumByColumnTV.h"
#include "SumByColumnVT.h"
#include "TopK.h"
#include "Unsqueeze.h"
#include "XMatrixSegment.h"
#include "XTensorBLAS.h"
#include "shape/Concatenate.h"
#include "shape/ConcatenateSolely.h"
#include "movement/CopyBlocks.h"
#include "movement/CopyBlocksInGrid.h"
#include "movement/CopyBlocksOnSite.h"
#include "movement/CopyData2D.h"
#include "movement/CopyIndexed.h"
#include "movement/CopyInGrid.h"
#include "movement/CopyValues.h"
#include "utilities/FlushToMem.h"
#include "shape/MakeMergeBlockIndex.h"
#include "shape/MakeSplitBlockIndex.h"
#include "arithmetic/MatrixMul.h"
#include "arithmetic/MatrixMul2D.h"
#include "arithmetic/MatrixMul2DMultiTheading.h"
#include "arithmetic/MatrixMul2DParallel.h"
#include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/MatrixMULBatchedCPU.h"
#include "shape/Merge.h"
#include "shape/MergeBlockLists.h"
#include "arithmetic/Multiply.h"
#include "arithmetic/Negate.h"
#include "math/Normalize.h"
#include "shape/Permute.h"
#include "math/Power.h"
#include "reduce/ReduceMax.h"
#include "reduce/ReduceMean.h"
#include "reduce/ReduceStandardVariance.h"
#include "reduce/ReduceSum.h"
#include "reduce/ReduceSumSquared.h"
#include "reduce/ReduceVariance.h"
#include "math/ScaleAndShift.h"
#include "getandset/Select.h"
#include "getandset/SetData.h"
#include "sort/Sort.h"
#include "shape/Split.h"
#include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "sort/TopK.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "utilities/XMatrixSegment.h"
#include "arithmetic/XTensorBLAS.h"
#endif // __CHEADER_H__
\ No newline at end of file
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMULBatchedCPU.h"
#include "MatrixMul2D.h"
#include "XTensorBLAS.h"
......@@ -38,8 +38,8 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> beta - scalar
*/
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta)
{
CheckNTErrors((a && b && c), "Empty input lists!");
CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!");
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHEDCPU_H__
#define __MATRIXMULBATCHEDCPU_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMULBatchedCPU.h"
......@@ -54,10 +55,16 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!");
"Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
"Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL_H__
#define __MATRIXMUL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "MatrixMul2DParallel.h"
......@@ -51,6 +52,13 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMUL2D);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1];
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "XTensorBLAS.h"
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2D_H__
#define __MATRIXMUL2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DMultiTheading.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DMULTITHEADING_H__
#define __MATRIXMUL2DMULTITHEADING_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMul2DMultiTheading.h"
#include "XMatrixSegment.h"
#include "../utilities/XMatrixSegment.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DPARALLEL_H__
#define __MATRIXMUL2DPARALLEL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "XTensorBLAS.h"
......@@ -43,16 +44,22 @@ where trans() returns the transposed matrix if the flag is fired
>> parallelRunner - parallel processing module
*/
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner)
XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!");
"Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
"Input tensors must have a order > 2!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadInt(c, transposedA);
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHED_H__
#define __MATRIXMULBATCHED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "MultiplyElementWise.h"
#include "MultiplyElementWise.cuh"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -35,16 +36,21 @@ where i is the index of the item
>> alpha - the coefficient
>>
*/
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_MULTIPLY);
XLink::AddParamToHeadInt(c, leadingDim);
XLink::AddParamToHead(c, alpha);
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
CudaMultiplyElementWise(a, b, c, leadingDim, alpha);
CudaMultiply(a, b, c, leadingDim, alpha);
return;
}
#endif
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "MultiplyElementWise.h"
#include "MultiplyElementWise.cuh"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -121,7 +121,7 @@ where i is the item index
>> alpha - the coefficient
*/
extern "C"
void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __MULTIPLYELEMENTWISE_CUH__
#define __MULTIPLYELEMENTWISE_CUH__
#ifndef __MULTIPLY_CUH__
#define __MULTIPLY_CUH__
#include "MultiplyElementWise.h"
#include "Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -42,11 +42,11 @@ void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE al
/* element-wise product of two tensors */
extern "C"
void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha);
void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYELEMENTWISE_CUH__
#endif // __MULTIPLY_CUH__
......@@ -19,17 +19,17 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#ifndef __MULTIPLYELEMENTWISE_H__
#define __MULTIPLYELEMENTWISE_H__
#ifndef __MULTIPLY_H__
#define __MULTIPLY_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* element-wise product of two tensors */
extern "C"
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0);
void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);
} // namespace nts(NiuTrans.Tensor)
#endif // __MULTIPLYELEMENTWISE_H__
\ No newline at end of file
#endif // __MULTIPLY_H__
\ No newline at end of file
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sum.h"
#include "Sum.cuh"
......@@ -37,14 +38,15 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
if (c == NULL)
c = a;
CheckNTErrors((a && b && c),
"Empty tensors in addition!");
CheckNTErrors(a && b && c, "Empty tensors in addition!");
CheckNTErrors(a->unitNum == b->unitNum && a->unitNum == c->unitNum,
"Unmatched tensors in addition!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!");
CheckNTErrors((a->unitNum == b->unitNum && a->unitNum == c->unitNum),
"Unmatched tensors in addition!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Unmatched tensors in addition!");
/* make tensor connections */
XLink::MakeLink(a, b, c, MATH_SUM);
XLink::AddParamToHead(c, beta);
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../../XDevice.h"
#include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SUM_H__
#define __SUM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_CUH__
#define __REDUCEMAX_CUH__
#include "ReduceMax.h"
#include "../reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNTV_H__
#define __SUMBYCOLUMNTV_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNVT_H__
#define __SUMBYCOLUMNVT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,8 +20,8 @@
*/
#include "XTensorBLAS.h"
#include "../XTensor.h"
#include "../XBLAS.h"
#include "../../XTensor.h"
#include "../../XBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,16 +19,18 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XUtility.h"
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */
/*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
......@@ -83,7 +85,9 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
}
}
/* matrix multiplication via cuda version BLAS */
/*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
......@@ -139,6 +143,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
}
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
......@@ -193,7 +198,9 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
}
}
/* matrix multiplication via cuda version BLAS */
/*
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULList(cublasHandle_t * handle,
XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
......
......@@ -22,7 +22,7 @@
#ifndef __XTENSORBLAS_H__
#define __XTENSORBLAS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include "../../XUtility.h"
#include "../../XName.h"
#include "Select.h"
#include "../XUtility.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -28,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
generate a tensor with seleccted data in range[low,high] along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> dim - the dimension along with which we do the job
>> low - lower bound
>> high - higher bound.
Note that range [1,3] means that we select 1 and 2.
>> c - result tensor
*/
void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
{
CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
......@@ -54,8 +55,14 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
}
}
int dimRDI = a->order - dim - 1;
/* make tensor connections */
XLink::MakeLink(a, NULL, c, MATH_SELECTRANGE);
XLink::AddParamToHeadInt(c, dim);
XLink::AddParamToHeadInt(c, low);
XLink::AddParamToHeadInt(c, high);
int stride = 1;
int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
......
......@@ -22,18 +22,18 @@
#ifndef __SELECT_H__
#define __SELECT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with seleccted data c = select(a) */
extern "C"
void Select(XTensor * a, XTensor * indexCPU, XTensor * c);
void Select(XTensor * a, XTensor * c, XTensor * indexCPU);
/* generate a tensor with seleccted data in range[low,high] along the given dimension
c = select(a) */
extern "C"
void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c);
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
*/
#include "SetData.h"
#include "CopyValues.h"
#include "../movement/CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
......
......@@ -23,7 +23,7 @@
#ifndef __SETDATA_H__
#define __SETDATA_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,7 @@
*/
#include <math.h>
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Normalize.h"
#include "Normalize.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Normalize.h"
#include "Normalize.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __NORMALIZE_H__
#define __NORMALIZE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,7 @@
*/
#include <math.h>
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Power.h"
#include "Power.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Power.h"
#include "Power.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __POWER_H__
#define __POWER_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
#include "ScaleAndShift.h"
#include "ScaleAndShift.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_CUH__
#define __SCALEANDSHIFT_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_H__
#define __SCALEANDSHIFT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CopyBlocks.h"
#include "CopyBlocksOnSite.h"
#include "CopyBlocksSelected.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_H__
#define __COPYBLOCKS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyBlocksInGrid.h"
#include "../XUtility.h"
#include "../../XUtility.h"
#include "CopyBlocksInGrid.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -39,7 +39,7 @@ Note that a grid may have a number of blocks
>> isIndexOnDev - indicates whether the index is on the device already
*/
void CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target,
int * index, int unitSize, bool isIndexOnDev, XMem * myMem)
int * index, int unitSize, bool isIndexOnDev, XMem * myMem)
{
CheckNTErrors((unitSize == sizeof(int)), "TODO!");
......
......@@ -21,7 +21,7 @@
#include "CopyBlocksInGrid.h"
#include "CopyBlocksInGrid.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_CUH__
#define __COPYBLOCKSINGRID_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_H__
#define __COPYBLOCKSINGRID_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh"
......
......@@ -21,7 +21,7 @@
#include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_CUH__
#define __COPYBLOCKS_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSONSITE_H__
#define __COPYBLOCKSONSITE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,8 +21,8 @@
#include "CopyBlocks.h"
#include "CopyBlocksSelected.cuh"
#include "../XUtility.h"
#include "../XDevice.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSSELECTED_CUH__
#define __COPYBLOCKSSELECTED_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyData2D.h"
#include "../XUtility.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYDATA2D_H__
#define __COPYDATA2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyInGrid.h"
#include "CopyBlocksInGrid.h"
......
......@@ -22,7 +22,7 @@
#ifndef __COPYINGRID_H__
#define __COPYINGRID_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYINDEXED_H__
#define __COPYINDEXED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XName.h"
#include "CopyValues.h"
#include "CopyValues.cuh"
......@@ -41,6 +42,9 @@ bool CopyValues(XTensor * s, XTensor * t, XStream * stream)
CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!");
CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!");
/* make tensor connections */
XLink::MakeLink(s, NULL, t, MATH_COPYVALUES);
if ((s->dataType == X_FLOAT16 && t->dataType == X_FLOAT) ||
(s->dataType == X_FLOAT && t->dataType == X_FLOAT16)) {
CheckNTErrors(((s->devID < 0 && t->devID < 0) || s->devID == t->devID),
......
......@@ -21,8 +21,8 @@
#include "CopyValues.h"
#include "CopyValues.cuh"
#include "../XUtility.h"
#include "../XDevice.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_CUH__
#define __COPYVALUES_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_H__
#define __COPYVALUES_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......@@ -34,7 +35,7 @@ get the max value of the items along a dimension of the tensor.
void ReduceMax(XTensor * input, XTensor * output, int dim)
{
CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)),
"This code must be run on the same device!");
"This code must be run on the same device!");
CheckNTErrors((input && output), "Empty input or output tensors!");
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
......@@ -44,14 +45,18 @@ void ReduceMax(XTensor * input, XTensor * output, int dim)
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i]),
"Unmatched tensors!");
"Unmatched tensors!");
}
else if(i > dimRDI){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i - 1]),
"Unmatched tensors!");
"Unmatched tensors!");
}
}
/* make tensor connections */
XLink::MakeLink(input, NULL, output, MATH_REDUCEMAX);
XLink::AddParamToHeadInt(output, dim);
if(input->devID >= 0){
#ifdef USE_CUDA
CudaReduceMax(input, output, dim);
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_H__
#define __REDUCEMAX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "ScaleAndShift.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceMean.h"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMEAN_H__
#define __REDUCEMEAN_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESTANDARDVARIANCE_H__
#define __REDUCESTANDARDVARIANCE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "ReduceSum.h"
#include "ReduceSum.cuh"
#include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -58,6 +59,12 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP
}
}
/* make tensor connections */
XLink::MakeLink(input, shift, output, MATH_REDUCESUM);
XLink::AddParamToHeadInt(output, dim);
XLink::AddParamToHead(output, power);
XLink::AddParamToHeadInt(output, isExp);
if(input->devID >= 0){
#ifdef USE_CUDA
CudaReduceSum(input, output, dim, shift, power, isExp);
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "ReduceSum.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESUM_H__
#define __REDUCESUM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论