Commit 90c12836 by xiaotong

remove useless header files

parent f6fe574c
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
/* this is a class wrapper of tensor operations */
#ifndef __XTENSORCORE_H__
#define __XTENSORCORE_H__
namespace nts{ // namespace nts(NiuTrans.Tensor)
#include "../XTensor.h"
#include "CHeader.h"
/* tensor core operations */
class XTensorCore
{
public:
/*
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */
void Concatenate(XList * smalls, XTensor * big, int dim);
/* concatenate two tensors along a given dimension */
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim);
/* concatenate a list of tensors along a given dimension */
static
void ConcatenateSolely(XList * smalls, XTensor * big, int dim);
/* copy selected sub-tensors */
static
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
/* copy a number of blocks in grid */
static
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev = false);
/* copy s to t */
static
bool CopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
/* set target data block index for the data movement in merge */
static
void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem);
/* set target data block index for the data movement in split */
static
void MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem);
/*
matrix multiplication. For the input tensors a and b, we perform matrix multiplication
on the first two dimentsions. E.g., let A be a tensor of size y * z * m and B be
a tensor of size x * y * n. For A * B, we go over each order-2 tensor of A (of size x * y)
and each order-2 tensor B (of size z * x), like this
c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
where trans() returns the transposed matrix if the flag is fired, ai is the i-th
element tensor of A, bj is the j-th element tensor of B, and c_{i,j} is the (i,j) element
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs
normal matrix multiplication if A = y * z and B = x * y.
*/
static
void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication (for 2d tensors)
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
*/
static
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
/*
matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/
static
void MatrixMul2DMultiTheading(XList * args);
/*
matrix multiplication (for 2d tensors) with multi-threading
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
*/
static
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication of the two tensors
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
*/
static
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/* matrix multiplication in batch mode (CPU code) */
static
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */
void Merge(XList * smalls, XTensor * big, int whereToMerge);
/* merge data by blocks */
void MergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
/* element-wise product of two tensors */
static
void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0);
/* set every entry to its minus value */
void Negate(XTensor * a);
/*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
static
void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTensor * var, XTensor * a, XTensor * b, DTYPE epsilon);
/* get the power(x, y) */
void Power(XTensor * a, DTYPE p);
/* get the max value of the items along a dimension of the tensor. */
static
void ReduceMax(XTensor * input, XTensor * output, int dim);
/*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i
*/
static
void ReduceMean(XTensor * input, XTensor * output, int dim);
/*
standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = (1/n * \sum_i (a_i - mean)^2)^0.5
*/
static
void ReduceStandardVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
/*
sum the items along a dimension of the tensor. For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
*/
static
void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false);
/*
squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
*/
static
void ReduceSumSquared(XTensor * input, XTensor * output, int dim, XTensor * shift);
/*
variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2
*/
static
void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
/* scale and shift all tensor entires */
static
void ScaleAndShift(XTensor * a, DTYPE scale, DTYPE shift);
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */
void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum);
/* tensor summation c = a + b * \beta */
static
void Sum(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* sum of a tensor and a (column) vector */
static
void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* sum of a (column) vector and a tensor */
static
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
/* get the top-k items along a given dimension */
static
void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k);
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize);
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
static
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
/* segment a block into sub-blocks */
static
int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex);
/* segment a block into sub-blocks */
static
int SegmentTensor2DInRows(int rowNum, int colNum, int blockNum, int * blockIndex);
/* matrix multiplication (BLAS) */
static
void MatrixMULCPU(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
#ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */
static
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void * c, TENSOR_DATA_TYPE dataTypeC,
int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */
static
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void ** c, TENSOR_DATA_TYPE dataTypeC,
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch and strided mode via cuda version BLAS */
static
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC,
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */
static
void CudaBLASMatrixMULList(cublasHandle_t * handle, XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
#endif
};
} // namespace nts(NiuTrans.Tensor)
#endif // __XTENSORCORE_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
/* this is a class wrapper of tensor functions */
#ifndef __XTENSORCORE_H__
#define __XTENSORCORE_H__
namespace nts{ // namespace nts(NiuTrans.Tensor)
#include "../XTensor.h"
#include "CHeader.h"
/* tensor functions */
class XTensorFunction
{
public:
};
} // namespace nts(NiuTrans.Tensor)
#endif // __XTENSORCORE_H__
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论