Commit bc49d32a by xuchen

Merge with liyinqiao brach and add the max/min function

parent cadda317
...@@ -71,6 +71,9 @@ void BackwardTest() ...@@ -71,6 +71,9 @@ void BackwardTest()
XTensor a; XTensor a;
XTensor b; XTensor b;
XTensor c; XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2D(&a, 2, 3);
...@@ -88,14 +91,15 @@ void BackwardTest() ...@@ -88,14 +91,15 @@ void BackwardTest()
b.Set1D(2.0F, 0); b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1); b.Set1D(1.0F, 1);
c = DivDim(a, b, 0); DivDim(a, b, c, 0);
c.Dump(stderr, "c:"); c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c); //XLink::ShowNetwork(stderr, &c);
net.Backward(c); net.Backward(loss);
net.Dump(stderr); a.grad->Dump(stderr);
} }
......
...@@ -26,183 +26,9 @@ ...@@ -26,183 +26,9 @@
* *
*/ */
#ifdef WIN32
#include <wtypes.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include "XBLAS.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
#ifdef WIN32
HINSTANCE hBLASDll;
#endif
/* single-precision floating matrix-matrix multiplication */
void (*XBLAS_SGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating matrix-matrix multiplication */
void (*XBLAS_DGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
double *, OPENBLAS_CONST BLASINT);
/* single-precision floating vector-vector multiplication (rank-1) */
void (*XBLAS_SGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
float *, OPENBLAS_CONST BLASINT);
/* double-precision floating vector-vector multiplication (rank-1) */
void (*XBLAS_DGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
double *, OPENBLAS_CONST BLASINT);
/* set the number of threads */
void (*XBLAS_SET_THREAD_NUM)(int);
/* get the number of threads */
//int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/
int (*XBLAS_GET_CORE_NUM)();
/* get the CPU corename */
//char * (*XBLAS_GET_CORE_NAME)();
/* get the parallelization type used by OpenBLAS */
//int (*XBLAS_GET_PARALLEL_TYPE)(void);
#if defined(USE_BLAS)
/* load some stuff for BLAS */
void LoadBLAS(const char * dllFileName)
{
#ifndef CUDA_BLAS
#ifdef _WIN32
#if defined(OPENBLAS)
/* non-ascii characters are not supported yet */
wchar_t * fn = new wchar_t[strlen(dllFileName) + 1];
memset(fn, 0, sizeof(wchar_t) * (strlen(dllFileName) + 1));
for(int i = 0; i < strlen(dllFileName); i++)
fn[i] = dllFileName[i];
hBLASDll = LoadLibrary((LPCWSTR)fn);
if(!hBLASDll){
XPRINT1(0, stderr, "[LoadBLAS] Error! Cannot load dll %s!\n", dllFileName);
exit(1);
}
/* matrix-matrix multiplicatoin */
(FARPROC&)XBLAS_SGEMM = GetProcAddress(hBLASDll, "cblas_sgemm");
(FARPROC&)XBLAS_DGEMM = GetProcAddress(hBLASDll, "cblas_dgemm");
/* vector-vector multiplication */
(FARPROC&)XBLAS_SGER = GetProcAddress(hBLASDll, "cblas_sger");
(FARPROC&)XBLAS_DGER = GetProcAddress(hBLASDll, "cblas_dger");
/* multi-threading */
(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_set_num_threads");
//(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "goto_set_num_threads");
//(FARPROC&)XBLAS_GET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_get_num_threads");
(FARPROC&)XBLAS_GET_CORE_NUM = GetProcAddress(hBLASDll, "openblas_get_num_procs");
//(FARPROC&)XBLAS_GET_CORE_NAME = GetProcAddress(hBLASDll, "openblas_get_corename");
//(FARPROC&)XBLAS_GET_PARALLEL_TYPE = GetProcAddress(hBLASDll, "openblas_get_parallel");
delete[] fn;
#endif // defined(OPENBLAS)
#if defined(MKL)
/* non-ascii characters are not supported yet */
wchar_t * fn = new wchar_t[strlen(dllFileName) + 1];
memset(fn, 0, sizeof(wchar_t) * (strlen(dllFileName) + 1));
for(int i = 0; i < strlen(dllFileName); i++)
fn[i] = dllFileName[i];
hBLASDll = LoadLibrary((LPCWSTR)fn);
if(!hBLASDll){
XPRINT1(0, stderr, "[LoadBLAS] Error! Cannot load dll %s!\n", dllFileName);
exit(1);
}
/* matrix-matrix multiplicatoin */
(FARPROC&)XBLAS_SGEMM = GetProcAddress(hBLASDll, "cblas_sgemm");
(FARPROC&)XBLAS_DGEMM = GetProcAddress(hBLASDll, "cblas_dgemm");
/* vector-vector multiplication */
(FARPROC&)XBLAS_SGER = GetProcAddress(hBLASDll, "cblas_sger");
(FARPROC&)XBLAS_DGER = GetProcAddress(hBLASDll, "cblas_dger");
/* multi-threading */
(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "MKL_Set_Num_Threads");
(FARPROC&)XBLAS_GET_CORE_NUM = GetProcAddress(hBLASDll, "MKL_Get_Max_Threads");
#endif // defined(MKL)
#else // _WIN32
XBLAS_SGEMM = &cblas_sgemm;
XBLAS_DGEMM = &cblas_dgemm;
XBLAS_SGER = &cblas_sger;
XBLAS_DGER = &cblas_dger;
#if defined(OPENBLAS)
XBLAS_SET_THREAD_NUM = &openblas_set_num_threads;
XBLAS_GET_CORE_NUM = &openblas_get_num_procs;
#endif // defined(OPENBLAS)
#if defined(MKL)
XBLAS_SET_THREAD_NUM = &mkl_set_num_threads;
XBLAS_GET_CORE_NUM = &mkl_get_max_num_threads;
#endif // defined(MKL)
#endif // _WIN32
XBLAS_SET_THREAD_NUM(1);
#endif // ndef(CUDA_BLAS)
}
/* unload the libs */
void UnloadBLAS()
{
#ifdef _WIN32
if(!FreeLibrary(hBLASDll)){
XPRINT(0, stderr, "[UnloadBLAS] Error! Cannot free the BLAS dll!\n");
exit(1);
}
#else
#endif
}
#else // undefined(USE_BLAS) || undefined(OPENBLAS)
void LoadBLAS(const char * dllFileName)
{
XPRINT(0, stderr, "[LoadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!\n");
exit(1);
}
void UnloadBLAS()
{
XPRINT(0, stderr, "[UnloadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!\n");
exit(1);
}
#endif // defined(USE_BLAS) && defined(OPENBLAS)
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
...@@ -34,7 +34,6 @@ namespace nts{ ...@@ -34,7 +34,6 @@ namespace nts{
/* some of the code below is from OpenBLAS (https://github.com/xianyi/OpenBLAS) */ /* some of the code below is from OpenBLAS (https://github.com/xianyi/OpenBLAS) */
//#define OPENBLAS
#define OPENBLAS_CONST const #define OPENBLAS_CONST const
typedef int BLASINT; typedef int BLASINT;
...@@ -46,7 +45,26 @@ typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; ...@@ -46,7 +45,26 @@ typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
#if defined(USE_BLAS) #if defined(USE_BLAS)
#ifdef OPENBLAS
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM openblas_set_num_threads
#define XBLAS_GET_CORE_NUM openblas_get_num_procs
#endif
#ifdef MKL
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM MKL_Set_Num_Threads
#define XBLAS_GET_CORE_NUM MKL_Get_Max_Threads
#endif
/* /*
single/double-precision floating matrix-matrix multiplication (rank-3) single/double-precision floating matrix-matrix multiplication (rank-3)
- SGEMM (ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) - SGEMM (ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
...@@ -62,14 +80,14 @@ where A, B and C are matrices, ...@@ -62,14 +80,14 @@ where A, B and C are matrices,
LDB(=N) specifies the size of the first dimension of B as declared in the calling (sub) program, LDB(=N) specifies the size of the first dimension of B as declared in the calling (sub) program,
and LDC(=N) specifies the size of the first dimension of C as declared in the calling (sub) program. and LDC(=N) specifies the size of the first dimension of C as declared in the calling (sub) program.
*/ */
extern "C" void (*XBLAS_SGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE, extern "C" void XBLAS_SGEMM(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float,
float *, OPENBLAS_CONST BLASINT); float *, OPENBLAS_CONST BLASINT);
/* double-precision floating matrix-matrix multiplication */ /* double-precision floating matrix-matrix multiplication */
extern "C" void (*XBLAS_DGEMM)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE, extern "C" void XBLAS_DGEMM(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE, OPENBLAS_CONST enum CBLAS_TRANSPOSE,
OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double,
...@@ -88,24 +106,33 @@ where X and Y are vectors with m and n elements respectively, ...@@ -88,24 +106,33 @@ where X and Y are vectors with m and n elements respectively,
E.g., if we are using CblasRowMajor, the leading dimension is the number of columns of A. E.g., if we are using CblasRowMajor, the leading dimension is the number of columns of A.
*/ */
extern "C" void (*XBLAS_SGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha, extern "C" void XBLAS_SGER(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST float *, OPENBLAS_CONST BLASINT,
float *, OPENBLAS_CONST BLASINT); float *, OPENBLAS_CONST BLASINT);
/* double-precision floating vector-vector multiplication (rank-1) */ /* double-precision floating vector-vector multiplication (rank-1) */
extern "C" void (*XBLAS_DGER)(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha, extern "C" void XBLAS_DGER(OPENBLAS_CONST enum CBLAS_ORDER, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT, OPENBLAS_CONST double *, OPENBLAS_CONST BLASINT,
double *, OPENBLAS_CONST BLASINT); double *, OPENBLAS_CONST BLASINT);
/*
some description
*/
extern "C" void XBLAS_SAXPY(OPENBLAS_CONST BLASINT n, OPENBLAS_CONST float a, OPENBLAS_CONST float *x, OPENBLAS_CONST BLASINT incx, OPENBLAS_CONST float *y, OPENBLAS_CONST BLASINT incy);
/* double-precision floating sumMe function */
extern "C" void XBLAS_DAXPY(OPENBLAS_CONST BLASINT n, OPENBLAS_CONST double a, OPENBLAS_CONST double *x, OPENBLAS_CONST BLASINT incx, OPENBLAS_CONST double *y, OPENBLAS_CONST BLASINT incy);
/* set the number of threads */ /* set the number of threads */
extern "C" void (*XBLAS_SET_THREAD_NUM)(int); extern "C" void XBLAS_SET_THREAD_NUM(int);
/* get the number of threads */ /* get the number of threads */
//extern "C" int (*XBLAS_GET_THREAD_NUM)(); //extern "C" int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/ /* get the number of physical processors (cores).*/
extern "C" int (*XBLAS_GET_CORE_NUM)(); extern "C" int XBLAS_GET_CORE_NUM();
/* get the CPU corename */ /* get the CPU corename */
//extern "C" char * (*XBLAS_GET_CORE_NAME)(); //extern "C" char * (*XBLAS_GET_CORE_NAME)();
...@@ -113,58 +140,6 @@ extern "C" int (*XBLAS_GET_CORE_NUM)(); ...@@ -113,58 +140,6 @@ extern "C" int (*XBLAS_GET_CORE_NUM)();
/* get the parallelization type used by OpenBLAS */ /* get the parallelization type used by OpenBLAS */
//extern "C" int (*XBLAS_GET_PARALLEL_TYPE)(void); //extern "C" int (*XBLAS_GET_PARALLEL_TYPE)(void);
/* linux systems */
#ifndef _WIN32
/* cblas functions that are imported from the lib. See cblas.h in OpenBlas for more information */
extern "C" void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST BLASINT K, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *A, OPENBLAS_CONST BLASINT lda,
OPENBLAS_CONST float *B, OPENBLAS_CONST BLASINT ldb,
OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST BLASINT ldc);
extern "C" void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB,
OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST BLASINT K, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *A, OPENBLAS_CONST BLASINT lda,
OPENBLAS_CONST double *B, OPENBLAS_CONST BLASINT ldb,
OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST BLASINT ldc);
extern "C" void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST float alpha,
OPENBLAS_CONST float *X, OPENBLAS_CONST BLASINT incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST BLASINT incY,
float *A, OPENBLAS_CONST BLASINT lda);
extern "C" void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST BLASINT M, OPENBLAS_CONST BLASINT N, OPENBLAS_CONST double alpha,
OPENBLAS_CONST double *X, OPENBLAS_CONST BLASINT incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST BLASINT incY,
double *A, OPENBLAS_CONST BLASINT lda);
#if defined(OPENBLAS)
/* better control of multi-threading */
extern "C" void openblas_set_num_threads(int num_threads);
extern "C" void goto_set_num_threads(int num_threads);
//extern "C" int openblas_get_num_threads(void);
extern "C" int openblas_get_num_procs(void);
//extern "C" char* openblas_get_config(void);
//extern "C" char* openblas_get_corename(void);
//extern "C" int openblas_get_parallel(void);
#endif
#endif
#if defined(MKL)
/* better control of multi-threading */
//_Mkl_Api(void,MKL_Set_Num_Threads,(int nth))
//_Mkl_Api(int,MKL_Get_Max_Threads,(void))
extern "C" void MKL_Set_Num_Threads(int num_threads);
extern "C" int MKL_Get_Max_Threads();
#define mkl_set_num_threads MKL_Set_Num_Threads
#define mkl_get_max_num_threads MKL_Get_Max_Threads
//extern "C" void mkl_set_num_threads(int num_threads);
//extern "C" void omp_set_num_threads(int num_threads);
//extern "C" int mkl_get_max_num_threads();
#endif
#if defined(CUDA_BLAS) #if defined(CUDA_BLAS)
...@@ -186,24 +161,8 @@ extern void BLASMatrixMULD(int deviceID, double * a, double * b, double * c, int ...@@ -186,24 +161,8 @@ extern void BLASMatrixMULD(int deviceID, double * a, double * b, double * c, int
#endif #endif
#endif
#ifdef _WIN32
#include "windows.h"
extern HINSTANCE hBLASDll;
#else
#endif #endif
/* load some stuff for BLAS */
extern void LoadBLAS(const char * dllFileName);
/* unload the libs */
extern void UnloadBLAS();
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
...@@ -160,8 +160,10 @@ extern bool useCUDA; ...@@ -160,8 +160,10 @@ extern bool useCUDA;
/* BLAS interfaces */ /* BLAS interfaces */
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
#define GEMM XBLAS_DGEMM #define GEMM XBLAS_DGEMM
#define AXPY XBLAS_DAXPY
#else #else
#define GEMM XBLAS_SGEMM #define GEMM XBLAS_SGEMM
#define AXPY XBLAS_SAXPY
#endif #endif
extern void InitGlobalAll(); extern void InitGlobalAll();
......
...@@ -300,6 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id ...@@ -300,6 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if(h == NULL) if(h == NULL)
return; return;
if (!t1->enableGrad)
return;
TensorList list(2); TensorList list(2);
list.Add((XTensor*)t1); list.Add((XTensor*)t1);
list.Add((XTensor*)t2); list.Add((XTensor*)t2);
...@@ -320,6 +323,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, ...@@ -320,6 +323,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
if (h == NULL) if (h == NULL)
return; return;
if (!t1->enableGrad || !t2->enableGrad)
return;
TensorList list(3); TensorList list(3);
list.Add((XTensor*)t1); list.Add((XTensor*)t1);
list.Add((XTensor*)t2); list.Add((XTensor*)t2);
...@@ -370,6 +376,9 @@ create a hyper edge with a input tensors and a list of output tensors ...@@ -370,6 +376,9 @@ create a hyper edge with a input tensors and a list of output tensors
*/ */
void XLink::MakeLink(XTensor * t, TensorList * list, int id) void XLink::MakeLink(XTensor * t, TensorList * list, int id)
{ {
if (!t->enableGrad)
return;
/* forward */ /* forward */
for(int i = 0; i < list->count; i++){ for(int i = 0; i < list->count; i++){
XTensor * h = (XTensor*)list->GetItem(i); XTensor * h = (XTensor*)list->GetItem(i);
......
...@@ -23,15 +23,11 @@ ...@@ -23,15 +23,11 @@
* *
*/ */
#include "XList.h" #include "time.h"
#include "XMem.h" #include "XMem.h"
#include "XList.h"
#include "XGlobal.h" #include "XGlobal.h"
#include <ctime>
#include <utility>
#include <algorithm>
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
...@@ -78,6 +74,7 @@ TensorListBase<T>::TensorListBase(int myMaxNum, XMem* myMem) ...@@ -78,6 +74,7 @@ TensorListBase<T>::TensorListBase(int myMaxNum, XMem* myMem)
template <typename T> template <typename T>
TensorListBase<T>::~TensorListBase() TensorListBase<T>::~TensorListBase()
{ {
if(items && mem)
delete[] items; delete[] items;
} }
...@@ -101,7 +98,13 @@ void TensorListBase<T>::Add(T&& item) ...@@ -101,7 +98,13 @@ void TensorListBase<T>::Add(T&& item)
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
items[count++] = item; items[count++] = item;
}
/* return number of elements */
template<typename T>
size_t TensorListBase<T>::Size()
{
return count;
} }
/* /*
...@@ -131,7 +134,7 @@ add a number of items into the list ...@@ -131,7 +134,7 @@ add a number of items into the list
>> inputItemCount - number of input items >> inputItemCount - number of input items
*/ */
template <typename T> template <typename T>
void TensorListBase<T>::Add(T* inputItems, int inputItemCount) void TensorListBase<T>::Add(const T* inputItems, int inputItemCount)
{ {
if (count + inputItemCount >= maxNum) { if (count + inputItemCount >= maxNum) {
int newMaxNum = (count + inputItemCount) * 2 + 1; int newMaxNum = (count + inputItemCount) * 2 + 1;
...@@ -207,10 +210,10 @@ void TensorListBase<T>::Insert(int pos, T&& item) ...@@ -207,10 +210,10 @@ void TensorListBase<T>::Insert(int pos, T&& item)
template <typename T> template <typename T>
T& TensorListBase<T>::GetItem(int i) const T& TensorListBase<T>::GetItem(int i) const
{ {
CheckNTErrors(i >= -1 && i < count, "Index of a list item is out of scope!"); CheckNTErrors(i >= -count && i < count, "Index of a list item is out of scope!");
CheckNTErrors(count > 0, "Cannt index the item in an empty list!"); CheckNTErrors(count > 0, "Cannt index the item in an empty list!");
if (i == -1) if (i < 0)
return items[count - 1]; return items[count + i];
else else
return items[i]; return items[i];
} }
...@@ -227,7 +230,7 @@ template<typename T> ...@@ -227,7 +230,7 @@ template<typename T>
inline void TensorListBase<T>::SetItem(int i, T&& item) inline void TensorListBase<T>::SetItem(int i, T&& item)
{ {
if (i >= 0 && i < count) if (i >= 0 && i < count)
items[i] = std::move(item); items[i] = item;
} }
/* /*
...@@ -246,6 +249,26 @@ inline int TensorListBase<T>::FindFirst(const T& item) ...@@ -246,6 +249,26 @@ inline int TensorListBase<T>::FindFirst(const T& item)
return -1; return -1;
} }
template <>
inline int TensorListBase<Example>::FindFirst(const Example& item)
{
for (int i = 0; i < count; i++) {
if (item.id == items[i].id)
return i;
}
return -1;
}
template <>
inline int TensorListBase<Result>::FindFirst(const Result& item)
{
for (int i = 0; i < count; i++) {
if (item.id == items[i].id)
return i;
}
return -1;
}
/* clear the data array */ /* clear the data array */
template <typename T> template <typename T>
void TensorListBase<T>::Clear() void TensorListBase<T>::Clear()
...@@ -295,6 +318,17 @@ void TensorListBase<T>::Remove(int i) ...@@ -295,6 +318,17 @@ void TensorListBase<T>::Remove(int i)
count--; count--;
} }
template<typename T>
void TensorListBase<T>::Reserve(int n)
{
if (items) {
/* reserve failed */
return;
}
items = new T[n];
}
/* /*
copy the list copy the list
>> myMem - memory pool used for allocating the data in the new list >> myMem - memory pool used for allocating the data in the new list
...@@ -349,6 +383,8 @@ template struct TensorListBase<long>; ...@@ -349,6 +383,8 @@ template struct TensorListBase<long>;
template struct TensorListBase<float>; template struct TensorListBase<float>;
template struct TensorListBase<short>; template struct TensorListBase<short>;
template struct TensorListBase<XTensor*>; template struct TensorListBase<XTensor*>;
template struct TensorListBase<Result>;
template struct TensorListBase<Example>;
template struct TensorListBase<void*>; template struct TensorListBase<void*>;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
...@@ -66,11 +66,14 @@ public: ...@@ -66,11 +66,14 @@ public:
/* add an item into the list */ /* add an item into the list */
void Add(T&& item); void Add(T&& item);
/* return number of elements */
size_t Size();
/* add an item into the list */ /* add an item into the list */
void Add(const T& item); void Add(const T& item);
/* add a number of items into the list */ /* add a number of items into the list */
void Add(T* inputItems, int inputItemCount); void Add(const T* inputItems, int inputItemCount);
/* append a list to the current list */ /* append a list to the current list */
void AddList(TensorListBase* l); void AddList(TensorListBase* l);
...@@ -105,6 +108,9 @@ public: ...@@ -105,6 +108,9 @@ public:
/* remove the item at position i */ /* remove the item at position i */
void Remove(int i); void Remove(int i);
/* reserve space for data entry */
void Reserve(int n);
/* copy the list */ /* copy the list */
TensorListBase* Copy(XMem* myMem); TensorListBase* Copy(XMem* myMem);
...@@ -112,22 +118,33 @@ public: ...@@ -112,22 +118,33 @@ public:
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
T& operator[] (int i) { T& operator[] (int i) { return GetItem(i); };
return GetItem(i);
};
T& Get(int i) { return GetItem(i); }; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); }; void Set(int i, T item) { SetItem(i, item); };
}; };
struct XTensor; struct XTensor;
typedef TensorListBase<void*> XList;
typedef TensorListBase<int> IntList; typedef TensorListBase<int> IntList;
typedef TensorListBase<char> CharList; typedef TensorListBase<char> CharList;
typedef TensorListBase<char*> StrList; typedef TensorListBase<char*> StrList;
typedef TensorListBase<long> LongList; typedef TensorListBase<long> LongList;
typedef TensorListBase<float> FloatList; typedef TensorListBase<float> FloatList;
typedef TensorListBase<short> ShortList; typedef TensorListBase<short> ShortList;
typedef TensorListBase<void*> XList;
struct Example {
int id;
IntList data;
};
struct Result {
int id;
IntList data;
};
typedef TensorListBase<Result> ResultList;
typedef TensorListBase<Example> ExampleList;
typedef TensorListBase<XTensor*> TensorList; typedef TensorListBase<XTensor*> TensorList;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MASK MATH_DIVDIM + 1 #define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1 #define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1 #define MATH_MAX MATH_MATRIXMULBATCHED + 1
#define MATH_MIN MATH_MAX + 1
#define MATH_MULTIPLY MATH_MIN + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1 #define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1 #define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1 #define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
......
...@@ -280,7 +280,7 @@ void XTensor::Init() ...@@ -280,7 +280,7 @@ void XTensor::Init()
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false; isVar = false;
enableGrad = false; enableGrad = true;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -397,7 +397,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -397,7 +397,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!"); CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
/* create tensor links for the new tensor */ /* create tensor links for the new tensor */
XLink::Replace(&tensor, this); XLink::Copy(&tensor, this);
} }
return *this; return *this;
...@@ -445,7 +445,7 @@ XTensor& XTensor::operator= (const XTensor&& tensor) ...@@ -445,7 +445,7 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
This is VERY tricky and there might be better solutions :) */ This is VERY tricky and there might be better solutions :) */
*tensor.dataP = NULL; *tensor.dataP = NULL;
XLink::Replace(&tensor, this); XLink::Copy(&tensor, this);
return *this; return *this;
} }
...@@ -720,7 +720,7 @@ int XTensor::GetSize() const ...@@ -720,7 +720,7 @@ int XTensor::GetSize() const
} }
/* get the size of the memory space used */ /* get the size of the memory space used */
int XTensor::GetDataSizeInChar() int XTensor::GetDataSizeInChar() const
{ {
if(isSparse){ if(isSparse){
int num = int(unitNum * denseRatio + 1); int num = int(unitNum * denseRatio + 1);
...@@ -738,7 +738,7 @@ get unit size in terms of "dataType" ...@@ -738,7 +738,7 @@ get unit size in terms of "dataType"
>> myDataType - type of unit >> myDataType - type of unit
<< return - unit size << return - unit size
*/ */
int XTensor::GetUnitSize(TENSOR_DATA_TYPE myDataType) int XTensor::GetUnitSize(TENSOR_DATA_TYPE myDataType) const
{ {
if(myDataType == X_INT) if(myDataType == X_INT)
return sizeof(int); return sizeof(int);
...@@ -758,7 +758,7 @@ get offset (2D) ...@@ -758,7 +758,7 @@ get offset (2D)
>> row - index of demension 0 >> row - index of demension 0
>> col - index of demension 1 >> col - index of demension 1
*/ */
MTYPE XTensor::GetOffset2D(int row, int col) MTYPE XTensor::GetOffset2D(int row, int col) const
{ {
CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors(row >= 0 && row < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(row >= 0 && row < dimSize[0], "dimension 0 is out of range!");
...@@ -773,7 +773,7 @@ get offset (3D) ...@@ -773,7 +773,7 @@ get offset (3D)
>> d1 - index of demension 1 >> d1 - index of demension 1
>> d2 - index of demension 2 >> d2 - index of demension 2
*/ */
MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const
{ {
CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 2!");
CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
...@@ -856,6 +856,16 @@ void XTensor::Rand(int rNum, int cNum) ...@@ -856,6 +856,16 @@ void XTensor::Rand(int rNum, int cNum)
_SetDataRand(this, rNum, cNum); _SetDataRand(this, rNum, cNum);
} }
/* generate data items with a range by start, end and the step
>> start - the begin of the array
>> end - the end of the array (not included self)
>> step - the step of two items
*/
void XTensor::Range(DTYPE lower, DTYPE upper, DTYPE step)
{
_SetDataRange(this, lower, upper, step);
}
/* /*
set the tensor items by a uniform distribution in range [lower, upper] set the tensor items by a uniform distribution in range [lower, upper]
>> lower - lower value of the range >> lower - lower value of the range
...@@ -929,9 +939,11 @@ set the tensor items by a normal distribution ...@@ -929,9 +939,11 @@ set the tensor items by a normal distribution
void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation) void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
{ {
// TODO: cuda code!!!!!!! // TODO: cuda code!!!!!!!
if (data == NULL) if (data == NULL)
return; return;
// srand((unsigned)time(0));
void * d = NULL; void * d = NULL;
if (dataType == X_FLOAT) { if (dataType == X_FLOAT) {
d = new float[unitNum]; d = new float[unitNum];
...@@ -986,7 +998,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num) ...@@ -986,7 +998,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
>> num - number of data items >> num - number of data items
>> beg - where we start this in the data array of the tensor >> beg - where we start this in the data array of the tensor
*/ */
bool XTensor::CheckData(const void * d, int num, int beg) bool XTensor::CheckData(const void * d, int num, int beg) const
{ {
if (data == NULL || d == NULL) if (data == NULL || d == NULL)
return false; return false;
...@@ -1030,7 +1042,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError) ...@@ -1030,7 +1042,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
} }
/* check whether the data array is the same as the "answer" */ /* check whether the data array is the same as the "answer" */
bool XTensor::CheckData(const void * d, int num, float tolerance, int beg) bool XTensor::CheckData(const void * d, int num, float tolerance, int beg) const
{ {
if (data == NULL || d == NULL) if (data == NULL || d == NULL)
return false; return false;
...@@ -1106,7 +1118,7 @@ get the value of a cell with the index ...@@ -1106,7 +1118,7 @@ get the value of a cell with the index
>> size - size of the index >> size - size of the index
<< return - cell value << return - cell value
*/ */
DTYPE XTensor::Get(int index[], int size) DTYPE XTensor::Get(int index[], int size) const
{ {
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in the default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in the default type.");
...@@ -1118,7 +1130,7 @@ get the value of a cell with its offset ...@@ -1118,7 +1130,7 @@ get the value of a cell with its offset
>> offset - offset in the array >> offset - offset in the array
<< return - cell value << return - cell value
*/ */
DTYPE XTensor::Get(int offset) DTYPE XTensor::Get(int offset) const
{ {
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in the default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in the default type.");
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!"); CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
...@@ -1170,7 +1182,7 @@ get the value of a cell in a 1d tensor in default type ...@@ -1170,7 +1182,7 @@ get the value of a cell in a 1d tensor in default type
>> i - idex >> i - idex
<< return - value of cell(i) in float << return - value of cell(i) in float
*/ */
DTYPE XTensor::Get1D(int i) DTYPE XTensor::Get1D(int i) const
{ {
CheckNTErrors((order == 1), "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors((order == 1), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!"); CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!");
...@@ -1207,7 +1219,7 @@ get the value of a cell in a 3d tensor ...@@ -1207,7 +1219,7 @@ get the value of a cell in a 3d tensor
>> d1 - index of dimension 1 >> d1 - index of dimension 1
>> d2 - index of dimension 2 >> d2 - index of dimension 2
*/ */
DTYPE XTensor::Get3D(int d0, int d1, int d2) DTYPE XTensor::Get3D(int d0, int d1, int d2) const
{ {
CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!"); CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!");
...@@ -1225,7 +1237,7 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2) ...@@ -1225,7 +1237,7 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2)
get the int value of a cell by its offset get the int value of a cell by its offset
>> offset - offset of the item >> offset - offset of the item
*/ */
int XTensor::GetInt(int offset) int XTensor::GetInt(int offset) const
{ {
CheckNTErrors(dataType == X_INT, "The tensor is not in the integer type."); CheckNTErrors(dataType == X_INT, "The tensor is not in the integer type.");
CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!"); CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
...@@ -1242,7 +1254,7 @@ get the value of a cell in a 1d tensor in int type ...@@ -1242,7 +1254,7 @@ get the value of a cell in a 1d tensor in int type
>> i - index >> i - index
<< return - value of cell(i) in int << return - value of cell(i) in int
*/ */
int XTensor::Get1DInt(int i) int XTensor::Get1DInt(int i) const
{ {
CheckNTErrors(order == 1, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 1, "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(i >= 0 && i < dimSize[0], "dimension 0 is out of range!");
...@@ -1260,7 +1272,7 @@ get the value of a cell in a 2d tensor in int type ...@@ -1260,7 +1272,7 @@ get the value of a cell in a 2d tensor in int type
>> mi - column index >> mi - column index
<< return - value of cell(ni, mi) in int << return - value of cell(ni, mi) in int
*/ */
int XTensor::Get2DInt(int ni, int mi) int XTensor::Get2DInt(int ni, int mi) const
{ {
CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors(ni >= 0 && ni < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(ni >= 0 && ni < dimSize[0], "dimension 0 is out of range!");
...@@ -1280,7 +1292,7 @@ get the value of a cell in a 3d tensor in int type ...@@ -1280,7 +1292,7 @@ get the value of a cell in a 3d tensor in int type
>> d2 - index of dimension 2 >> d2 - index of dimension 2
<< return - value of cell(d0, d1, d2) in int << return - value of cell(d0, d1, d2) in int
*/ */
int XTensor::Get3DInt(int d0, int d1, int d2) int XTensor::Get3DInt(int d0, int d1, int d2) const
{ {
CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
...@@ -1299,7 +1311,7 @@ get the value of a cell in the sparse tensor ...@@ -1299,7 +1311,7 @@ get the value of a cell in the sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor >> i - i-th tuple in the tuple list of the sparse tensor
<< return - value of the tuple << return - value of the tuple
*/ */
DTYPE XTensor::GetInSparse(int i) DTYPE XTensor::GetInSparse(int i) const
{ {
CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!"); CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
...@@ -1315,7 +1327,7 @@ get the key value of a tuple in a sparse tensor ...@@ -1315,7 +1327,7 @@ get the key value of a tuple in a sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor >> i - i-th tuple in the tuple list of the sparse tensor
<< return - key of the tuple << return - key of the tuple
*/ */
int XTensor::GetKeyInSparse(int i) int XTensor::GetKeyInSparse(int i) const
{ {
CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!"); CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
...@@ -1528,7 +1540,7 @@ increase the value of a cell in a 2d tensor ...@@ -1528,7 +1540,7 @@ increase the value of a cell in a 2d tensor
} }
/* get the number of non-zero elements (in a sparse tensor) */ /* get the number of non-zero elements (in a sparse tensor) */
int XTensor::GetNonzeroSize() int XTensor::GetNonzeroSize() const
{ {
if(!isSparse){ if(!isSparse){
XPRINT(1, stderr, "WARNING! Counting non-zero elements in a dense tensor might be slow!\n"); XPRINT(1, stderr, "WARNING! Counting non-zero elements in a dense tensor might be slow!\n");
...@@ -1893,148 +1905,6 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1893,148 +1905,6 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
} }
} }
void * RecursionData(XTensor * s, int dim, int * index, void * d, FILE * file)
{
if (dim == s->order - 2) {
/* print index */
printf("Index: ");
for (int i = 0; i < s->order-2; i++)
printf("[%d]", index[i]);
int dimSize1 = s->dimSize[dim];
int dimSize2 = s->dimSize[dim+1];
printf(" %d * %d\n", dimSize1, dimSize2);
/* print 2D data */
if (s->dataType == X_FLOAT) {
float * data = (float*)d;
for (int i = 0; i < dimSize1; i++) {
printf("\t");
for (int j = 0; j < dimSize2; j++)
fprintf(file, "%e ", *data++);
fprintf(file, "\n");
}
d = (float*)d + dimSize1 *dimSize2;
}
else if (s->dataType == X_INT) {
int * data = (int*)d;
for (int i = 0; i < dimSize1; i++) {
printf("\t");
for (int j = 0; j < dimSize2; j++)
fprintf(file, "%d ", *data++);
fprintf(file, "\n");
}
d = (int*)d + dimSize1 *dimSize2;
}
else
ShowNTErrors("TODO!");
return d;
}
/* recursion for deeper dimsion */
int levelSize = s->dimSize[dim];
for (int k = 0; k < levelSize; k++) {
index[dim] = k;
d = RecursionData(s, dim+1, index, d, file);
}
return d;
}
/*
dump data to a file
>> file - where to domp the data
>> label - label of the tensor
>> n - number of items to dump
>> beg - the first item id
>> verbose - verbose level
*/
void XTensor::DumpFormat(FILE * file, const char * label, const int n, const int beg, const int verbose)
{
if (verbose > verboseLevel)
return;
void * d = data;
bool isNewData = false;
#ifdef USE_CUDA
if (devID >= 0) {
CudaGPUToCPUFlush(this);
d = dataHost;
isNewData = true;
}
#endif
if (d == NULL) {
if (isSparse) {
int num = 0;
for (int i = 0; i < order; i++)
num *= dimSizeRDI[i];
num = int(num * denseRatio + 1);
int tupleSize = sizeof(int) + sizeof(DTYPE);
int size = sizeof(int) + tupleSize*(num);
d = new char[size];
memset(d, 0, size);
}
else {
d = new char[unitNum * unitSize];
memset(d, 0, unitNum * unitSize);
}
isNewData = true;
}
if (label != NULL)
fprintf(file, "%s ", label);
if(isInit){
fprintf(file, "id=%d ", id);
fprintf(file, "order=%d dimsize=", order);
for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]);
if (i < order - 1)
fprintf(file, ",");
}
}
else{
fprintf(file, "order=-1 dimsize=-1");
}
fprintf(file, " dtype=%s dense=%f\n", GetDataTypeName(dataType), denseRatio);
if(!isInit){
fprintf(file, "NULL");
}
if (order == 1) {
for (int i = 0; i < unitNum; i++) {
if (dataType == X_FLOAT)
fprintf(file, "%e ", ((float*)d)[i]);
else if (dataType == X_INT)
fprintf(file, "%d ", ((int*)d)[i]);
else
ShowNTErrors("TODO!");
}
printf("\n");
}
/* print multi-dimensional tensor */
else {
int * index = new int[order];
RecursionData(this, 0, index, d, file);
delete[] index;
}
fprintf(file, "\n");
if (isNewData) {
delete[](char*)d;
#ifdef USE_CUDA
if (devID >= 0)
dataHost = NULL;
#endif
}
}
/* /*
dump data to a file dump data to a file
>> tensor - the tensor for dumping >> tensor - the tensor for dumping
...@@ -2052,6 +1922,26 @@ void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, cons ...@@ -2052,6 +1922,26 @@ void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, cons
} }
/* /*
dump data to a binary file
>> file - where to dump the data
*/
void XTensor::BinaryDump(FILE* file)
{
XTensor tmp;
InitTensorOnCPU(&tmp, this);
_CopyValues(this, &tmp);
switch (dataType) {
case X_INT: {
fwrite(tmp.data, sizeof(int), unitNum, file);
}
default: {
fwrite(tmp.data, sizeof(float), unitNum, file);
}
}
}
/*
read data from a file read data from a file
>> file - where to load the data >> file - where to load the data
>> label - label of the tensor >> label - label of the tensor
...@@ -2164,6 +2054,30 @@ void XTensor::Read(FILE * file, const char * label) ...@@ -2164,6 +2054,30 @@ void XTensor::Read(FILE * file, const char * label)
} }
/* /*
read data from a binary file
>>> file - the file stream pointer
>>> offset - the distance from the start to this tensor
*/
void XTensor::BinaryRead(FILE* file, size_t offset)
{
fseek(file, offset, 0);
switch (dataType) {
case X_INT: {
int * d = new int[unitNum];
fread(d, sizeof(int), unitNum, file);
SetData(d, unitNum);
delete[] d;
}
default: {
float * d = new float[unitNum];
fread(d, sizeof(float), unitNum, file);
SetData(d, unitNum);
delete[] d;
}
}
}
/*
flush the data to the target device flush the data to the target device
>> targetMem - memory pool on the target device >> targetMem - memory pool on the target device
*/ */
...@@ -2327,7 +2241,7 @@ initialize a dense tensor V2 ...@@ -2327,7 +2241,7 @@ initialize a dense tensor V2
void InitTensorV2(XTensor * tensor, void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID) const int myDevID, const bool isEnableGrad)
{ {
if (tensor->mem == NULL) { if (tensor->mem == NULL) {
XMem * myMem = GMems.GetMem(myDevID); XMem * myMem = GMems.GetMem(myDevID);
...@@ -2359,6 +2273,7 @@ void InitTensorV2(XTensor * tensor, ...@@ -2359,6 +2273,7 @@ void InitTensorV2(XTensor * tensor,
if(allocated) if(allocated)
XTensor::AllocateData(tensor); XTensor::AllocateData(tensor);
} }
tensor->enableGrad = isEnableGrad;
} }
/* /*
...@@ -2392,12 +2307,12 @@ initialize a dense tensor V2 ...@@ -2392,12 +2307,12 @@ initialize a dense tensor V2
*/ */
void InitTensor1DV2(XTensor * tensor, const int num, void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[1]; int dims[1];
dims[0] = num; dims[0] = num;
InitTensorV2(tensor, 1, dims, myDataType, myDevID); InitTensorV2(tensor, 1, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2434,13 +2349,13 @@ initialize a dense matrix V2 ...@@ -2434,13 +2349,13 @@ initialize a dense matrix V2
*/ */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum, void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[2]; int dims[2];
dims[0] = rowNum; dims[0] = rowNum;
dims[1] = colNum; dims[1] = colNum;
InitTensorV2(tensor, 2, dims, myDataType, myDevID); InitTensorV2(tensor, 2, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2480,14 +2395,14 @@ initialize a dense 3d tensor V2 ...@@ -2480,14 +2395,14 @@ initialize a dense 3d tensor V2
*/ */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2, void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[3]; int dims[3];
dims[0] = d0; dims[0] = d0;
dims[1] = d1; dims[1] = d1;
dims[2] = d2; dims[2] = d2;
InitTensorV2(tensor, 3, dims, myDataType, myDevID); InitTensorV2(tensor, 3, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2530,7 +2445,7 @@ initialize a dense 4d tensor V2 ...@@ -2530,7 +2445,7 @@ initialize a dense 4d tensor V2
*/ */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[4]; int dims[4];
dims[0] = d0; dims[0] = d0;
...@@ -2538,7 +2453,7 @@ void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, ...@@ -2538,7 +2453,7 @@ void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2,
dims[2] = d2; dims[2] = d2;
dims[3] = d3; dims[3] = d3;
InitTensorV2(tensor, 4, dims, myDataType, myDevID); InitTensorV2(tensor, 4, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2584,7 +2499,7 @@ initialize a dense 5d tensor V2 ...@@ -2584,7 +2499,7 @@ initialize a dense 5d tensor V2
*/ */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4, void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[5]; int dims[5];
dims[0] = d0; dims[0] = d0;
...@@ -2593,7 +2508,7 @@ void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, ...@@ -2593,7 +2508,7 @@ void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2,
dims[3] = d3; dims[3] = d3;
dims[4] = d4; dims[4] = d4;
InitTensorV2(tensor, 5, dims, myDataType, myDevID); InitTensorV2(tensor, 5, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2685,10 +2600,12 @@ generate a dense XTensor V2 ...@@ -2685,10 +2600,12 @@ generate a dense XTensor V2
*/ */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID) const int myDevID, const bool isEnableGrad)
{ {
XMem * myMem = GMems.GetMem(myDevID); XMem * myMem = GMems.GetMem(myDevID);
return new XTensor(myOrder, myDimSize, myDataType, 1.0F, myDevID, myMem); XTensor * tensor = new XTensor(myOrder, myDimSize, myDataType, 1.0F, myDevID, myMem);
tensor->enableGrad = isEnableGrad;
return tensor;
} }
/* /*
...@@ -2714,6 +2631,9 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, ...@@ -2714,6 +2631,9 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, devID, myMem); XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, devID, myMem);
if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200);
}
if(myMem != NULL) if(myMem != NULL)
tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize); tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize);
else else
...@@ -2732,14 +2652,14 @@ generate a dense XTensor which allocates data on the buffer V2 ...@@ -2732,14 +2652,14 @@ generate a dense XTensor which allocates data on the buffer V2
*/ */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize, XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType, const int devID) const TENSOR_DATA_TYPE myDataType, const int devID, const bool isEnableGrad)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, myDimSize, sizeof(int) * myOrder); memcpy(dims, myDimSize, sizeof(int) * myOrder);
dims[0] = -abs(dims[0]); dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensorV2(myOrder, dims, myDataType, devID); XTensor * tensor = NewTensorV2(myOrder, dims, myDataType, devID, isEnableGrad);
if (tensor->unitNum * tensor->unitSize == 176657664) { if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200); tensor->Dump(stderr, "", 200);
...@@ -2771,10 +2691,10 @@ generate a XTensor which allocates data on the buffer V2 ...@@ -2771,10 +2691,10 @@ generate a XTensor which allocates data on the buffer V2
>> reference - reference tensor >> reference - reference tensor
>> devID - device id >> devID - device id
*/ */
XTensor * NewTensorBufV2(const XTensor * reference, int devID) XTensor * NewTensorBufV2(const XTensor * reference, int devID, const bool isEnableGrad)
{ {
return NewTensorBufV2(reference->order, reference->dimSize, return NewTensorBufV2(reference->order, reference->dimSize,
reference->dataType, devID); reference->dataType, devID, isEnableGrad);
} }
/* /*
...@@ -2806,12 +2726,12 @@ generate a dense vector V2 ...@@ -2806,12 +2726,12 @@ generate a dense vector V2
*/ */
XTensor * NewTensor1DV2(const int num, XTensor * NewTensor1DV2(const int num,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[1]; int dims[1];
dims[0] = num; dims[0] = num;
return NewTensorV2(1, dims, myDataType, myDevID); return NewTensorV2(1, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2846,13 +2766,13 @@ generate a dense matrix V2 ...@@ -2846,13 +2766,13 @@ generate a dense matrix V2
*/ */
XTensor * NewTensor2DV2(const int rowNum, const int colNum, XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[2]; int dims[2];
dims[0] = rowNum; dims[0] = rowNum;
dims[1] = colNum; dims[1] = colNum;
return NewTensorV2(2, dims, myDataType, myDevID); return NewTensorV2(2, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2890,14 +2810,14 @@ generate a dense 3d tensor V2 ...@@ -2890,14 +2810,14 @@ generate a dense 3d tensor V2
*/ */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2, XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[3]; int dims[3];
dims[0] = d0; dims[0] = d0;
dims[1] = d1; dims[1] = d1;
dims[2] = d2; dims[2] = d2;
return NewTensorV2(3, dims, myDataType, myDevID); return NewTensorV2(3, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2938,7 +2858,7 @@ generate a dense 4d tensor V2 ...@@ -2938,7 +2858,7 @@ generate a dense 4d tensor V2
*/ */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3, XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[4]; int dims[4];
dims[0] = d0; dims[0] = d0;
...@@ -2946,7 +2866,7 @@ XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3, ...@@ -2946,7 +2866,7 @@ XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
dims[2] = d2; dims[2] = d2;
dims[3] = d3; dims[3] = d3;
return NewTensorV2(4, dims, myDataType, myDevID); return NewTensorV2(4, dims, myDataType, myDevID, isEnableGrad);
} }
/* /*
...@@ -2990,7 +2910,7 @@ generate a dense 5d tensor V2 ...@@ -2990,7 +2910,7 @@ generate a dense 5d tensor V2
*/ */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4, XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType, const int myDevID) const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{ {
int dims[5]; int dims[5];
dims[0] = d0; dims[0] = d0;
...@@ -2999,7 +2919,17 @@ XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, ...@@ -2999,7 +2919,17 @@ XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3,
dims[3] = d3; dims[3] = d3;
dims[4] = d4; dims[4] = d4;
return NewTensorV2(5, dims, myDataType, myDevID); return NewTensorV2(5, dims, myDataType, myDevID, isEnableGrad);
}
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
{
int size = abs(upper - lower);
int unitNum = ceil(1.0 * size / abs(step));
XTensor * tensor = NewTensor1DV2(unitNum, myDataType, myDevID, isEnableGrad);
tensor->Range(lower, upper, step);
return tensor;
} }
/* /*
......
...@@ -290,16 +290,16 @@ public: ...@@ -290,16 +290,16 @@ public:
int GetSize() const; int GetSize() const;
/* get size of the memory used */ /* get size of the memory used */
int GetDataSizeInChar(); int GetDataSizeInChar() const;
/* get unit size in terms of "dataType" */ /* get unit size in terms of "dataType" */
int GetUnitSize(TENSOR_DATA_TYPE myDataType); int GetUnitSize(TENSOR_DATA_TYPE myDataType) const;
/* get offset (2D) */ /* get offset (2D) */
MTYPE GetOffset2D(int row, int col); MTYPE GetOffset2D(int row, int col) const;
/* get offset (3D) */ /* get offset (3D) */
MTYPE GetOffset3D(int d0, int d1, int d2); MTYPE GetOffset3D(int d0, int d1, int d2) const;
/* a tensor with all entries of 0 */ /* a tensor with all entries of 0 */
void SetZeroAll(XStream * stream = NULL); void SetZeroAll(XStream * stream = NULL);
...@@ -310,6 +310,9 @@ public: ...@@ -310,6 +310,9 @@ public:
/* generate data items with a uniform distribution in [0, 1] */ /* generate data items with a uniform distribution in [0, 1] */
void Rand(int rNum, int cNum); void Rand(int rNum, int cNum);
/* generate data items with a range by start, end and the step */
void Range(DTYPE lower, DTYPE upper, DTYPE step);
/* set tensor items by a uniform distribution */ /* set tensor items by a uniform distribution */
void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F); void SetDataRand(DTYPE lower = 0.0F, DTYPE upper = 1.0F);
...@@ -323,10 +326,10 @@ public: ...@@ -323,10 +326,10 @@ public:
void SetDataBatchedWithValues(MTYPE * offsets, void * values, int num); void SetDataBatchedWithValues(MTYPE * offsets, void * values, int num);
/* check whether the data array is the same as the answer */ /* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, int beg = 0); bool CheckData(const void * answer, int num, int beg = 0) const;
/* check whether the data array is the same as the answer */ /* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, float tolerance, int beg = 0); bool CheckData(const void * answer, int num, float tolerance, int beg = 0) const;
/* set the pointer to "data" */ /* set the pointer to "data" */
void SetDataPointer(); void SetDataPointer();
...@@ -335,40 +338,40 @@ public: ...@@ -335,40 +338,40 @@ public:
void SetAscendingOrder(int dim); void SetAscendingOrder(int dim);
/* get the value of a cell with the index */ /* get the value of a cell with the index */
DTYPE Get(int index[], int size = -1); DTYPE Get(int index[], int size = -1) const;
/* get the value of a cell with the offset */ /* get the value of a cell with the offset */
DTYPE Get(int offset); DTYPE Get(int offset) const;
/* get the pointer to a cell */ /* get the pointer to a cell */
void * GetCell(int index[], int size = -1) const; void * GetCell(int index[], int size = -1) const;
/* get the default type value of a cell in a 1d tensor */ /* get the default type value of a cell in a 1d tensor */
DTYPE Get1D(int i); DTYPE Get1D(int i) const;
/* get the default type value of a cell in a 2d tensor */ /* get the default type value of a cell in a 2d tensor */
DTYPE Get2D(int ni, int mi) const; DTYPE Get2D(int ni, int mi) const;
/* get the default type value of a cell in a 3d tensor */ /* get the default type value of a cell in a 3d tensor */
DTYPE Get3D(int d0, int d1, int d2); DTYPE Get3D(int d0, int d1, int d2) const;
/* get the int value of a cell by its offset */ /* get the int value of a cell by its offset */
int GetInt(int offset); int GetInt(int offset) const;
/* get the int value of a cell in a 1d tensor */ /* get the int value of a cell in a 1d tensor */
int Get1DInt(int i); int Get1DInt(int i) const;
/* get the int value of a cell in a 2d tensor */ /* get the int value of a cell in a 2d tensor */
int Get2DInt(int ni, int mi); int Get2DInt(int ni, int mi) const;
/* get the int value of a cell in a 3d tensor */ /* get the int value of a cell in a 3d tensor */
int Get3DInt(int d0, int d1, int d2); int Get3DInt(int d0, int d1, int d2) const;
/* get the value of a cell in a sparse tensor */ /* get the value of a cell in a sparse tensor */
DTYPE GetInSparse(int i); DTYPE GetInSparse(int i) const;
/* get the key value of a tuple in a sparse tensor */ /* get the key value of a tuple in a sparse tensor */
int GetKeyInSparse(int i); int GetKeyInSparse(int i) const;
/* set the value of a cell */ /* set the value of a cell */
bool Set(DTYPE value, int index[], int size = -1); bool Set(DTYPE value, int index[], int size = -1);
...@@ -404,7 +407,7 @@ public: ...@@ -404,7 +407,7 @@ public:
bool Add2D(DTYPE value, int ni, int mi); bool Add2D(DTYPE value, int ni, int mi);
/* get the number of non-zero elements (in a sparse tensor) */ /* get the number of non-zero elements (in a sparse tensor) */
int GetNonzeroSize(); int GetNonzeroSize() const;
/* set the tensor as "temporary" */ /* set the tensor as "temporary" */
void SetTMPFlag(bool myIsTmp = true); void SetTMPFlag(bool myIsTmp = true);
...@@ -430,15 +433,18 @@ public: ...@@ -430,15 +433,18 @@ public:
void Dump(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0); void Dump(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a file */ /* dump data to a file */
void DumpFormat(FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a file */
static static
void Dump(const XTensor * tensor, FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0); void Dump(const XTensor * tensor, FILE * file, const char * label = NULL, const int n = -1, const int beg = 0, const int verbose = 0);
/* dump data to a binary file */
void BinaryDump(FILE * file);
/* read data from a file */ /* read data from a file */
void Read(FILE * file, const char * label = NULL); void Read(FILE * file, const char * label = NULL);
/* read data from a binary file */
void BinaryRead(FILE * file, size_t offset);
/* flush the data to the target device */ /* flush the data to the target device */
void FlushToMem(XMem * targetMem); void FlushToMem(XMem * targetMem);
...@@ -469,7 +475,7 @@ void InitTensor(XTensor * tensor, ...@@ -469,7 +475,7 @@ void InitTensor(XTensor * tensor,
/* initialize a dense XTensor V2 */ /* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor, void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1); const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense vector */ /* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num, void InitTensor1D(XTensor * tensor, const int num,
...@@ -477,7 +483,7 @@ void InitTensor1D(XTensor * tensor, const int num, ...@@ -477,7 +483,7 @@ void InitTensor1D(XTensor * tensor, const int num,
/* initialize a dense vector V2 */ /* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num, void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense matrix */ /* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum, void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
...@@ -485,7 +491,7 @@ void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum, ...@@ -485,7 +491,7 @@ void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
/* initialize a dense matrix V2 */ /* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum, void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 3d tensor */ /* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2, void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
...@@ -493,7 +499,7 @@ void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2, ...@@ -493,7 +499,7 @@ void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
/* initialize a dense 3d tensor V2 */ /* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2, void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 4d tensor */ /* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
...@@ -501,7 +507,7 @@ void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, co ...@@ -501,7 +507,7 @@ void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, co
/* initialize a dense 4d tensor V2 */ /* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 5d tensor */ /* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4, void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
...@@ -509,7 +515,7 @@ void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, co ...@@ -509,7 +515,7 @@ void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, co
/* initialize a dense 5d tensor V2 */ /* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4, void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a tensor with a reference tensor */ /* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference); void InitTensor(XTensor * tensor, const XTensor * reference);
...@@ -529,7 +535,7 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_ ...@@ -529,7 +535,7 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_
/* generate a dense XTensor V2 */ /* generate a dense XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1); const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer */ /* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
...@@ -538,20 +544,20 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, ...@@ -538,20 +544,20 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
/* generate a dense XTensor which allocates data on the buffer V2 */ /* generate a dense XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize, XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer */ /* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem); XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer V2 */ /* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID); XTensor * NewTensorBufV2(const XTensor * reference, int devID, const bool isEnableGrad = true);
/* generate a dense vector */ /* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL); XMem * myMem = NULL);
/* generate a dense vector V2 */ /* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1); XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense matrix */ /* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum, XTensor * NewTensor2D(const int rowNum, const int colNum,
...@@ -561,7 +567,7 @@ XTensor * NewTensor2D(const int rowNum, const int colNum, ...@@ -561,7 +567,7 @@ XTensor * NewTensor2D(const int rowNum, const int colNum,
/* generate a dense matrix V2 */ /* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum, XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1); const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 3d tensor */ /* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2, XTensor * NewTensor3D(const int d0, const int d1, const int d2,
...@@ -571,7 +577,7 @@ XTensor * NewTensor3D(const int d0, const int d1, const int d2, ...@@ -571,7 +577,7 @@ XTensor * NewTensor3D(const int d0, const int d1, const int d2,
/* generate a dense 3d tensor V2 */ /* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2, XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1); const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 4d tensor */ /* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3, XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
...@@ -581,7 +587,7 @@ XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3, ...@@ -581,7 +587,7 @@ XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
/* generate a dense 4d tensor V2 */ /* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3, XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1); const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 5d tensor */ /* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4, XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
...@@ -591,7 +597,10 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co ...@@ -591,7 +597,10 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
/* generate a dense 5d tensor V2 */ /* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4, XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1); const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a copy of XTensor (with a reference to a given tensor) */ /* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true); XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
......
...@@ -215,19 +215,23 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim) ...@@ -215,19 +215,23 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
_Div(&a, &b, &c, alpha, leadingDim); _Div(&a, &b, &c, alpha, leadingDim);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim); XLink::AddParamToHeadInt(&c, leadingDim);
} }
}
else if(n >= 0 && n < a.order){ else if(n >= 0 && n < a.order){
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
} }
}
else{ else{
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
...@@ -261,7 +265,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -261,7 +265,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */ /* call _Div function */
_Div(&a, &b, &c, 0, leadingDim); _Div(&a, &b, &c, 0, leadingDim);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -272,7 +276,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -272,7 +276,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -164,9 +164,11 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha) ...@@ -164,9 +164,11 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
}
return c; return c;
} }
...@@ -193,7 +195,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha) ...@@ -193,7 +195,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (c.enableGrad == true) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -155,8 +155,10 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha) ...@@ -155,8 +155,10 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
_Mask(&a, &mask, &c, alpha); _Mask(&a, &mask, &c, alpha);
/* tensor connections */ /* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK); XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
}
return c; return c;
} }
...@@ -176,7 +178,7 @@ void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha) ...@@ -176,7 +178,7 @@ void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
/* call _Mask function */ /* call _Mask function */
_Mask(&a, &mask, &c, alpha); _Mask(&a, &mask, &c, alpha);
if (c.enableGrad) { if (a.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK); XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
} }
......
...@@ -296,10 +296,12 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -296,10 +296,12 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB); XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -344,7 +346,7 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -344,7 +346,7 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
...@@ -393,10 +395,12 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -393,10 +395,12 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -440,7 +444,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, ...@@ -440,7 +444,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
...@@ -82,10 +82,11 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -82,10 +82,11 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
b->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE) c->dataType == DEFAULT_DTYPE)
{ {
if (useBLAS) #if defined(USE_BLAS)
_MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta); _MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
else #else
_MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner); _MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
#endif
} }
else { else {
// TODO!! // TODO!!
......
...@@ -199,10 +199,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -199,10 +199,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
bi->data = (char*)b->data + i * bRealBlockSize; bi->data = (char*)b->data + i * bRealBlockSize;
ci->data = (char*)c->data + i * cRealBlockSize; ci->data = (char*)c->data + i * cRealBlockSize;
#ifdef USE_BLAS #ifdef USE_BLAS
if (useBLAS)
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else #else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif #endif
...@@ -262,10 +259,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -262,10 +259,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((bi->order == 2), "2d tensor (i.e., matrix) is required!"); CheckNTErrors((bi->order == 2), "2d tensor (i.e., matrix) is required!");
CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!"); CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!");
#ifdef USE_BLAS #ifdef USE_BLAS
if (useBLAS)
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else #else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif #endif
...@@ -320,10 +314,12 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const ...@@ -320,10 +314,12 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
_MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB); XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -376,10 +372,12 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b, ...@@ -376,10 +372,12 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
_MatrixMulBatched(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMulBatched(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -118,11 +118,87 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, ...@@ -118,11 +118,87 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
} }
/* tensor connections */ /* tensor connections */
if (w.enableGrad && b.enableGrad) {
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT); XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
//XLink::AddParamToHead(&c, beta); }
/* destroy variables */
delete[] dimSize;
DelTensorBuf(tmp);
return c;
}
/*
operation c = x * w + b MulAndShift
>> x - tensor x
>> w - tensor w
>> b - tensor b
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication
*/
XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
const XTensor& w, MATRIX_TRANS_TYPE transposedB,
const XTensor& b, DTYPE alpha, XPRunner* parallelRunner)
{
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = transposedA == X_TRANS ? x.dimSizeRDI[0] : x.dimSizeRDI[1];
int xm = transposedA == X_TRANS ? x.dimSizeRDI[1] : x.dimSizeRDI[0];
int wn = transposedB == X_TRANS ? w.dimSizeRDI[0] : w.dimSizeRDI[1];
int wm = transposedB == X_TRANS ? w.dimSizeRDI[1] : w.dimSizeRDI[0];
int order = x.order + w.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < x.order; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i];
for (int i = 2; i < w.order; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i];
dimSize[sub++] = xn;
dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
_MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner);
XTensor c(tmp);
c.SetTMPFlag();
int n = GetSumIndex(tmp, b);
if (n == -1) {
/* call _Sum function */
_Sum(tmp, &b, &c);
// TODO!!
ShowNTErrors("TODO!");
}
else if (n >= 0 && n < tmp->order) {
/* call _SumDim function */
_SumDim(tmp, &b, &c, n);
}
else {
ShowNTErrors("Something is wrong!");
}
/* tensor connections */
if (w.enableGrad && b.enableGrad) {
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -31,6 +31,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,6 +31,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
XTensor MulAndShift(const XTensor &x, MATRIX_TRANS_TYPE transposedA,
const XTensor &w, MATRIX_TRANS_TYPE transposedB,
const XTensor &b, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -216,19 +216,23 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim ...@@ -216,19 +216,23 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
_Multiply(&a, &b, &c, 0, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim); XLink::AddParamToHeadInt(&c, leadingDim);
} }
}
else if(n >= 0 && n < a.order){ else if(n >= 0 && n < a.order){
/* call _MultiplyDim function */ /* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
} }
}
else{ else{
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
...@@ -262,7 +266,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -262,7 +266,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */ /* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -273,7 +277,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -273,7 +277,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */ /* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -180,9 +180,11 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n) ...@@ -180,9 +180,11 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
}
return c; return c;
} }
...@@ -208,7 +210,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n) ...@@ -208,7 +210,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -350,8 +352,10 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b) ...@@ -350,8 +352,10 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b)
_MultiplyBroadcast(&a, &b, &c, 0); _MultiplyBroadcast(&a, &b, &c, 0);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
}
return c; return c;
} }
...@@ -374,7 +378,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c) ...@@ -374,7 +378,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0); _MultiplyBroadcast(&a, &b, &c, 0);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
......
...@@ -190,18 +190,22 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -190,18 +190,22 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
_Sub(&a, &b, &c, beta); _Sub(&a, &b, &c, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUB); XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
} }
}
else if(n >= 0 && n < a.order){ else if(n >= 0 && n < a.order){
/* call _SubDim function */ /* call _SubDim function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
} }
}
else{ else{
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
...@@ -229,7 +233,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -229,7 +233,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _Sub function */ /* call _Sub function */
_Sub(&a, &b, &c, beta); _Sub(&a, &b, &c, beta);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB); XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -239,7 +243,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -239,7 +243,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SubDim function */ /* call _SubDim function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -164,9 +164,11 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) ...@@ -164,9 +164,11 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
}
return c; return c;
} }
...@@ -193,7 +195,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta) ...@@ -193,7 +195,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XBLAS.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "Sum.h" #include "Sum.h"
#include "Sum.cuh" #include "Sum.cuh"
...@@ -84,7 +85,34 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -84,7 +85,34 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
DTYPE * ap = (DTYPE*)a->data; DTYPE * ap = (DTYPE*)a->data;
DTYPE * bp = (DTYPE*)b->data; DTYPE * bp = (DTYPE*)b->data;
DTYPE * cp = (DTYPE*)c->data; DTYPE * cp = (DTYPE*)c->data;
/* when c != a, OpenBLAS needs to copy a to c first. This operation
slow down the speed, so just use OpenBLAS when c == a */
#if defined(USE_BLAS)
if( c == a){
AXPY(a->unitNum,beta,bp,1,cp,1);
} else{
int num = a->unitNum;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
cp[i + 2] = ap[i + 2] + bp[i + 2] * beta;
cp[i + 3] = ap[i + 3] + bp[i + 3] * beta;
}
}
else if (num % 2 == 0) {
for (int i = 0; i < num; i += 2) {
cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
}
}
else {
for (int i = 0; i < num; i++) {
cp[i] = ap[i] + bp[i] * beta;
}
}
}
#else
/* unrolling */ /* unrolling */
int num = a->unitNum; int num = a->unitNum;
if (num % 4 == 0) { if (num % 4 == 0) {
...@@ -106,6 +134,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -106,6 +134,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
cp[i] = ap[i] + bp[i] * beta; cp[i] = ap[i] + bp[i] * beta;
} }
} }
#endif
} }
else { else {
// TODO!! // TODO!!
...@@ -195,18 +224,22 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -195,18 +224,22 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
_Sum(&a, &b, &c, beta); _Sum(&a, &b, &c, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUM); XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
} }
}
else if(n >= 0 && n < a.order){ else if(n >= 0 && n < a.order){
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
} }
}
else{ else{
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
...@@ -233,8 +266,8 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -233,8 +266,8 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _Sum function */ /* call _Sum function */
_Sum(&a, &b, &c, beta); _Sum(&a, &b, &c, beta);
if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUM); XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
} }
...@@ -243,8 +276,8 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -243,8 +276,8 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
......
...@@ -181,9 +181,11 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) ...@@ -181,9 +181,11 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
}
return c; return c;
} }
...@@ -210,7 +212,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta) ...@@ -210,7 +212,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -353,8 +355,10 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -353,8 +355,10 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta)
_SumBroadcast(&a, &b, &c, beta); _SumBroadcast(&a, &b, &c, beta);
/* tensor connections */ /* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
}
return c; return c;
} }
...@@ -377,7 +381,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -377,7 +381,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta); _SumBroadcast(&a, &b, &c, beta);
if (c.enableGrad) { if (a.enableGrad && b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
......
...@@ -121,6 +121,7 @@ XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType) ...@@ -121,6 +121,7 @@ XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType)
_ConvertDataType(&input, &output); _ConvertDataType(&input, &output);
/* tensor connection */ /* tensor connection */
if(input.enableGrad)
XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE); XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE);
return output; return output;
...@@ -136,7 +137,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d ...@@ -136,7 +137,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
_ConvertDataType(&input, &output); _ConvertDataType(&input, &output);
/* tensor connection */ /* tensor connection */
if (output.enableGrad) if (input.enableGrad)
XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE); XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE);
} }
......
...@@ -32,65 +32,43 @@ convert onehot tensor to index tensor ...@@ -32,65 +32,43 @@ convert onehot tensor to index tensor
>> index - index tensor, which value is an integer num >> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int dim) void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{ {
dim = (dim < 0 ? onehot->GetDim(-1) : dim); CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!"); CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
CheckNTErrors(dim < onehot->order, "Illegal speficied dimension!")
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
CheckNTErrors(index->dataType == X_INT, "The index tensor must be in X_INT!") CheckNTErrors(index->dataType == X_INT, "The index tensor must be in X_INT!")
for (int i = 0; i < index->order; i++) { for (int i = 0; i < index->order; i++)
if (i < dim) {
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!"); CheckNTErrors(index->GetDim(i) == onehot->GetDim(i), "Illegal tensor order!");
}
else {
CheckNTErrors(index->GetDim(i) == onehot->GetDim(i + 1), "Illegal tensor order!");
}
}
#ifdef USE_CUDA #ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
_CudaOnehotToIndex(onehot, index, dim); _CudaOnehotToIndex(onehot, index, size);
return; return;
} }
#endif #endif
int blockNum = 1; int blockNum = index->unitNum;
int blockSize = 1; int stride = size;
int dimSize = 1;
int stride = 1;
for (int i = 0; i < dim; i++)
blockNum *= onehot->GetDim(i);
blockSize = onehot->unitNum / blockNum;
dimSize = onehot->GetDim(dim);
for (int i = dim + 1; i < onehot->order; i++)
stride *= onehot->GetDim(i);
int * onehotData = (int *)onehot->data; int * onehotData = (int *)onehot->data;
int * indexData = (int *)index->data; int * indexData = (int *)index->data;
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
for (int j = 0; j < stride; j++) { int * od = onehotData + i * stride;
int * od = onehotData + i * blockSize + j;
int * index = indexData + i * stride + j;
int record = -1; int record = -1;
for (int j = 0; j < dimSize; j++) { for (int j = 0; j < stride; j++) {
if (od[j*stride] != 0) { if (od[j] != 0) {
if (record == -1) if (record == -1)
record = j; record = j;
else else
ShowNTErrors("The value of onehot tensor is illegal!"); ShowNTErrors("The value of onehot tensor is illegal!");
} }
} }
*index = record; indexData[i] = record;
}
} }
} }
/* /*
...@@ -101,7 +79,7 @@ make a new tensor to keep the result and return it ...@@ -101,7 +79,7 @@ make a new tensor to keep the result and return it
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
<< return - the index tensor << return - the index tensor
*/ */
XTensor OnehotToIndex(XTensor & onehot, int size) XTensor OnehotToIndex(const XTensor & onehot, int size)
{ {
CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!"); CheckNTErrors(onehot.GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
...@@ -123,10 +101,9 @@ convert index tensor to onehot tensor ...@@ -123,10 +101,9 @@ convert index tensor to onehot tensor
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _IndexToOnehot(const XTensor * index, XTensor * onehot, void _IndexToOnehot(const XTensor * index, XTensor * onehot,
float labelSmoothingP) int size, float labelSmoothingP)
{ {
int size = onehot->GetDim(-1); CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!"); CheckNTErrors(onehot->order == index->order + 1, "Illegal tensor order!");
//CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!") //CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
CheckNTErrors(index->dataType == X_INT, "The index tensor must be in X_INT!") CheckNTErrors(index->dataType == X_INT, "The index tensor must be in X_INT!")
...@@ -171,7 +148,7 @@ make a new tensor to keep the result and return it ...@@ -171,7 +148,7 @@ make a new tensor to keep the result and return it
>> confidence - labelsmoothing >> confidence - labelsmoothing
<< return - the onehot tensor << return - the onehot tensor
*/ */
XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
{ {
CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(index.dataType == X_INT, "The onehot tensor must be in X_INT!")
...@@ -184,7 +161,7 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP) ...@@ -184,7 +161,7 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
dim[order] = size; dim[order] = size;
InitTensor(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem); InitTensor(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
_IndexToOnehot(&index, &onehot, labelSmoothingP); _IndexToOnehot(&index, &onehot, size, labelSmoothingP);
delete[] dim; delete[] dim;
......
...@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version) ...@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version)
>> index - index tensor, which value is an integer num >> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor >> size - the last dimension size of the onehot tensor
*/ */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size) void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size)
{ {
int devID = onehot->devID; int devID = onehot->devID;
......
...@@ -27,10 +27,11 @@ ...@@ -27,10 +27,11 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor (cuda version) */ /* convert onehot tensor to index tensor (cuda version) */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size); void _CudaOnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert index tensor to onehot tensor (cuda version) */ /* convert index tensor to onehot tensor (cuda version) */
void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence); void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,19 +27,18 @@ ...@@ -27,19 +27,18 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor */ /* convert onehot tensor to index tensor */
void _OnehotToIndex(XTensor * onehot, XTensor * index, int dim); void _OnehotToIndex(const XTensor * onehot, XTensor * index, int size);
/* convert onehot tensor to index tensor (return an XTensor structure) /* convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor OnehotToIndex(XTensor & onehot, int size); XTensor OnehotToIndex(const XTensor & onehot, int num);
/* convert index tensor to onehot tensor */ /* convert index tensor to onehot tensor */
void _IndexToOnehot(const XTensor * index, XTensor * onehot, void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
float labelSmoothingP = 0.0F);
/* convert index tensor to onehot tensor (return an XTensor structure) /* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP = 0.0F); XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -117,10 +117,12 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -117,10 +117,12 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
_SelectRange(&a, &c, dim, low, high); _SelectRange(&a, &c, dim, low, high);
/* tensor connection */ /* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT); XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim); XLink::AddParamToHeadInt(&c, dim);
XLink::AddParamToHeadInt(&c, low); XLink::AddParamToHeadInt(&c, low);
XLink::AddParamToHeadInt(&c, high); XLink::AddParamToHeadInt(&c, high);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -526,6 +526,43 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper) ...@@ -526,6 +526,43 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
} }
} }
/* generate data items with a range by start, end and the step
>> tensor - the tensor whose data array would be initialized
>> start - the begin of the array
>> end - the end of the array (not included self)
>> step - the step of two items
*/
void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step)
{
CheckNTErrors((tensor->order == 1), "Tensor must be 1 dimension!");
/* compute the true length according to the (start, end, step) */
DTYPE size = fabs(upper - lower);
int num = ceil(size / fabs(step));
CheckNTErrors((tensor->unitNum == num), "Unit number of the tensor is not matched.");
/* init a integer array to store the sequence */
void * data = NULL;
if (tensor->dataType == X_INT) {
data = new int[num];
for (int i = 0; i < num; i++)
*((int*)data + i) = lower + i * step;
}
else if (tensor->dataType == X_FLOAT) {
data = new float[num];
for (int i = 0; i < num; i++)
*((float*)data + i) = lower + i * step;
}
else {
ShowNTErrors("TODO!");
}
/* set the data from the array */
tensor->SetData(data, num);
delete[] data;
}
/* /*
generate data items with a uniform distribution in [lower, upper] and set generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise the item to a pre-defined value if the item >= p, set the item to 0 otherwise
......
...@@ -69,6 +69,9 @@ void _SetDataRand(XTensor * tensor, int rNum, int cNum); ...@@ -69,6 +69,9 @@ void _SetDataRand(XTensor * tensor, int rNum, int cNum);
/* generate data items with a uniform distribution in [lower, upper] */ /* generate data items with a uniform distribution in [lower, upper] */
void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper); void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a range by start, end and the step */
void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step);
/* generate data items with a uniform distribution in [lower, upper] and set /* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */ the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value); void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
......
...@@ -167,7 +167,9 @@ XTensor funcName(const XTensor &a, T num) ...@@ -167,7 +167,9 @@ XTensor funcName(const XTensor &a, T num)
XTensor b(&a); \ XTensor b(&a); \
b.SetTMPFlag(); \ b.SetTMPFlag(); \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \
XLink::AddParamToHead(&b, num); \ XLink::AddParamToHead(&b, num); \
return b; \ return b; \
} \ } \
...@@ -183,7 +185,7 @@ void funcName(const XTensor &a, XTensor &b, T num) ...@@ -183,7 +185,7 @@ void funcName(const XTensor &a, XTensor &b, T num)
InitTensor(&b, &a); \ InitTensor(&b, &a); \
} \ } \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if (b.enableGrad) { \ if (a.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \ XLink::AddParamToHead(&b, num); \
} \ } \
......
...@@ -99,9 +99,11 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) ...@@ -99,9 +99,11 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
/* tensor connections */ /* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_CLIP); XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper); XLink::AddParamToHead(&b, upper);
}
return b; return b;
} }
...@@ -115,8 +117,8 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper) ...@@ -115,8 +117,8 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_CLIP); XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper); XLink::AddParamToHead(&b, upper);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h" #include "../../XName.h"
#include "Compare.h" #include "Compare.h"
#include "Compare.cuh" #include "Compare.cuh"
...@@ -123,4 +124,95 @@ SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual) ...@@ -123,4 +124,95 @@ SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL) SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL) SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL)
/* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a->devID, b->devID); \
CheckDev(a->devID, c->devID); \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, c); \
return; \
} \
DTYPE * da = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
DTYPE * dc = (DTYPE*)c->data; \
for (int i = 0; i < a->unitNum; i++) \
dc[i] = (DTYPE)origFunc(da[i], db[i]); \
}
#else
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor *c) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a, b); \
CheckDev(a, c); \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
DTYPE * da = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
DTYPE * dc = (DTYPE*)c->data; \
for (int i = 0; i < a->unitNum; i++) \
dc[i] = (DTYPE)origFunc(da[i], db[i]); \
}
#endif
#define _SIMPLE_MAX_MIN_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, const XTensor * b) \
{ \
_funcName(a, b, a); \
}
#define SIMPLE_MAX_MIN_FUNCTION_ME(funcNameMe, _funcName) \
void funcNameMe(XTensor & a, const XTensor & b) \
{ \
_funcName(&a, &b, &a); \
}
#define SIMPLE_MAX_MIN_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor & a, const XTensor & b) \
{ \
XTensor c(&a); \
c.SetTMPFlag(); \
_funcName(&a, &b, &c); \
return c; \
}
#define SIMPLE_MAX_MIN_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, const XTensor &b, XTensor c) \
{ \
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { \
InitTensor(&c, &a); \
} \
_funcName(&a, &b, &c); \
}
#ifdef USE_CUDA
_SIMPLE_MAX_MIN_FUNCTION(_Max, _CudaMax, max)
_SIMPLE_MAX_MIN_FUNCTION(_Min, _CudaMin, min)
#else
_SIMPLE_MAX_MIN_FUNCTION(_Max, max)
_SIMPLE_MAX_MIN_FUNCTION(_Min, min)
#endif
_SIMPLE_MAX_MIN_FUNCTION_ME(_MaxMe, _Max)
SIMPLE_MAX_MIN_FUNCTION_ME(MaxMe, _Max)
SIMPLE_MAX_MIN_FUNCTION(Max, _Max, MATH_MAX)
SIMPLE_MAX_MIN_FUNCTION_VOID(Max, _Max, MATH_MAX)
_SIMPLE_MAX_MIN_FUNCTION_ME(_MinMe, _Min)
SIMPLE_MAX_MIN_FUNCTION_ME(MinMe, _Min)
SIMPLE_MAX_MIN_FUNCTION(Min, _Min, MATH_MIN)
SIMPLE_MAX_MIN_FUNCTION_VOID(Min, _Min, MATH_MIN)
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -89,6 +89,53 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \ ...@@ -89,6 +89,53 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \
SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual) SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual)
SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual) SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual)
#define SIMPLE_MAX_MIN_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, DTYPE * c, int size) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
c[i] = (DTYPE)origFunc(a[i], b[i]); \
} \
__global__ \
void Kernel##funcName(__half * a, __half * b, __half * c, int size) \
{ \
return; \
} \
void _Cuda##funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == DEFAULT_DTYPE) { \
Kernel##funcName<<<blocks, threads>>> \
((DTYPE*)a->data, (DTYPE*)b->data, \
(DTYPE*)c->data, a->unitNum); \
} \
else if (a->dataType == X_FLOAT16) { \
Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, \
(__half*)c->data, a->unitNum); \
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
}
SIMPLE_MAX_MIN_FUNCTION_GPU(Max, max)
SIMPLE_MAX_MIN_FUNCTION_GPU(Min, min)
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -34,6 +34,12 @@ void _CudaEqual(const XTensor * a, XTensor * b, DTYPE value); ...@@ -34,6 +34,12 @@ void _CudaEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (cuda version) */ /* check whether every entry is not equal to the given value (cuda version) */
void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE value); void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* return maximum of two tensor for each items (cuda version) */
void _CudaMax(const XTensor * a, const XTensor * b, XTensor *c);
/* return minimum of two tensor for each items (cuda version) */
void _CudaMin(const XTensor * a, const XTensor * b, XTensor *c);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -56,6 +56,36 @@ XTensor NotEqual(const XTensor & a, DTYPE value); ...@@ -56,6 +56,36 @@ XTensor NotEqual(const XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value */ /* check whether every entry is not equal to the given value */
void NotEqual(const XTensor & a, XTensor & b, DTYPE value); void NotEqual(const XTensor & a, XTensor & b, DTYPE value);
/* return maximum of two tensor for each items */
void _Max(const XTensor * a, const XTensor * b, XTensor * c);
/* return maximum of two tensor for each items (do it on site) */
void _MaxMe(XTensor * a, const XTensor * b);
/* return maximum of two tensor for each items (do it on site) */
void MaxMe(XTensor & a, const XTensor & b);
/* return maximum of two tensor for each items (return an XTensor structure) */
XTensor Max(const XTensor & a, const XTensor & b);
/* return maximum of two tensor for each items */
void Max(const XTensor & a, const XTensor & b, XTensor & c);
/* return minimum of two tensor for each items */
void _Min(const XTensor * a, const XTensor * b, XTensor * c);
/* return minimum of two tensor for each items (do it on site) */
void _MinMe(XTensor * a, const XTensor * b);
/* return minimum of two tensor for each items (do it on site) */
void MinMe(XTensor & a, const XTensor & b);
/* return minimum of two tensor for each items (return an XTensor structure) */
XTensor Min(const XTensor & a, const XTensor & b);
/* return minimum of two tensor for each items */
void Min(const XTensor & a, const XTensor & b, XTensor & c);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // end __COMPARE_H__ #endif // end __COMPARE_H__
\ No newline at end of file
...@@ -173,9 +173,11 @@ XTensor Normalize(const XTensor &input, int dim, ...@@ -173,9 +173,11 @@ XTensor Normalize(const XTensor &input, int dim,
list.Add((XTensor*)&var); list.Add((XTensor*)&var);
list.Add((XTensor*)&a); list.Add((XTensor*)&a);
list.Add((XTensor*)&b); list.Add((XTensor*)&b);
if (input.enableGrad) {
XLink::MakeLink(&list, &output, MATH_NORMALIZE); XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon); XLink::AddParamToHead(&output, epsilon);
}
return output; return output;
} }
...@@ -208,7 +210,7 @@ void Normalize(const XTensor &input, XTensor &output, int dim, ...@@ -208,7 +210,7 @@ void Normalize(const XTensor &input, XTensor &output, int dim,
/* call _Normalize function */ /* call _Normalize function */
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon); _Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
if (output.enableGrad == true) { if (input.enableGrad == true) {
/* tensor connections */ /* tensor connections */
TensorList list(5); TensorList list(5);
list.Add((XTensor*)&input); list.Add((XTensor*)&input);
......
...@@ -126,9 +126,11 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift) ...@@ -126,9 +126,11 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
_ScaleAndShift(&a, &b, scale, shift); _ScaleAndShift(&a, &b, scale, shift);
/* tensor connections */ /* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT); XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale); XLink::AddParamToHead(&b, scale);
XLink::AddParamToHead(&b, shift); XLink::AddParamToHead(&b, shift);
}
return b; return b;
} }
...@@ -152,7 +154,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift) ...@@ -152,7 +154,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift)
/* call _ScaleAndShift function */ /* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift); _ScaleAndShift(&a, &b, scale, shift);
if (b.enableGrad) { if (a.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT); XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale); XLink::AddParamToHead(&b, scale);
......
...@@ -151,7 +151,9 @@ XTensor funcName(const XTensor & a) ...@@ -151,7 +151,9 @@ XTensor funcName(const XTensor & a)
XTensor b(&a); \ XTensor b(&a); \
b.SetTMPFlag(); \ b.SetTMPFlag(); \
_funcName(&a, &b); \ _funcName(&a, &b); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \
return b; \ return b; \
} }
...@@ -162,7 +164,7 @@ void funcName(const XTensor & a, XTensor & b) ...@@ -162,7 +164,7 @@ void funcName(const XTensor & a, XTensor & b)
InitTensor(&b, &a); \ InitTensor(&b, &a); \
} \ } \
_funcName(&a, &b); \ _funcName(&a, &b); \
if (b.enableGrad) { \ if (a.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \ } \
} }
......
...@@ -258,9 +258,11 @@ XTensor CopyIndexed(const XTensor & s, int dim, ...@@ -258,9 +258,11 @@ XTensor CopyIndexed(const XTensor & s, int dim,
list.Add((XTensor*)&tgtIndex); list.Add((XTensor*)&tgtIndex);
/* tensor connection */ /* tensor connection */
if (s.enableGrad) {
XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED); XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim); XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadInt(&t, copyNum); XLink::AddParamToHeadInt(&t, copyNum);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -314,12 +316,14 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in ...@@ -314,12 +316,14 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
memcpy(saveTgtIndex, tgtIndex, indexSize * sizeof(int)); memcpy(saveTgtIndex, tgtIndex, indexSize * sizeof(int));
/* tensor connection */ /* tensor connection */
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED); XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim); XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadPointer(&t, saveSrcIndex); XLink::AddParamToHeadPointer(&t, saveSrcIndex);
XLink::AddParamToHeadInt(&t, indexSize); XLink::AddParamToHeadInt(&t, indexSize);
XLink::AddParamToHeadPointer(&t, saveTgtIndex); XLink::AddParamToHeadPointer(&t, saveTgtIndex);
XLink::AddParamToHeadInt(&t, copyNum); XLink::AddParamToHeadInt(&t, copyNum);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -134,7 +134,9 @@ XTensor CopyValues(const XTensor &s, XStream * stream) ...@@ -134,7 +134,9 @@ XTensor CopyValues(const XTensor &s, XStream * stream)
_CopyValues(&s, &t, stream); _CopyValues(&s, &t, stream);
/* tensor connection */ /* tensor connection */
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYVALUES); XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYVALUES);
}
return t; return t;
} }
......
...@@ -93,7 +93,9 @@ XTensor Gather(XTensor &s, XTensor &index) ...@@ -93,7 +93,9 @@ XTensor Gather(XTensor &s, XTensor &index)
_Gather(&s, &t, &index); _Gather(&s, &t, &index);
/* tensor connection */ /* tensor connection */
if (s.enableGrad) {
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER); XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
}
return t; return t;
} }
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XBLAS.h"
#include "VectorBuffer.h"
#include "ReduceMax.h" #include "ReduceMax.h"
#include "ReduceMax.cuh" #include "ReduceMax.cuh"
...@@ -76,11 +78,67 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -76,11 +78,67 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
} }
blockSize = stride * strideNum; blockSize = stride * strideNum;
if(input->dimSizeRDI[0] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSizeRDI[0] >= 32){
int vecBufLength = 32 / sizeof(DTYPE);
if(dimRDI == 0){
//data is contiguous in dim 0
for(int i = 0; i < blockNum; i++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + i;
VectorBuffer vecBuf[4];
for(int j = 0; j < 4; j++){
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength);
}
for(int j = 1; j < strideNum / 32; j++){
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength);
vecBuf[0] = vecBuf[0].maxData(VectorBuffer::loadu(ptr + 0 * vecBufLength));
vecBuf[1] = vecBuf[1].maxData(VectorBuffer::loadu(ptr + 1 * vecBufLength));
vecBuf[2] = vecBuf[2].maxData(VectorBuffer::loadu(ptr + 2 * vecBufLength));
vecBuf[3] = vecBuf[3].maxData(VectorBuffer::loadu(ptr + 3 * vecBufLength));
}
vecBuf[0] = vecBuf[0].maxData(vecBuf[1]);
vecBuf[0] = vecBuf[0].maxData(vecBuf[2]);
vecBuf[0] = vecBuf[0].maxData(vecBuf[3]);
DTYPE maxN = DTYPE_MIN;
for(int k = 0; k < vecBufLength; k++){
maxN = MAX(maxN,vecBuf[0][k]);
}
*op = maxN;
}
} else{
//data is separated
for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSizeRDI[0] / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i;
VectorBuffer vecBuf[4];
for(int k = 0; k < 4; k++){
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE));
}
for(int k = 1; k < strideNum; k++){
DTYPE * ptr = ip + k * stride + (j * 4) * vecBufLength;
vecBuf[0] = vecBuf[0].maxData(VectorBuffer::loadu(ptr + 0 * vecBufLength));
vecBuf[1] = vecBuf[1].maxData(VectorBuffer::loadu(ptr + 1 * vecBufLength));
vecBuf[2] = vecBuf[2].maxData(VectorBuffer::loadu(ptr + 2 * vecBufLength));
vecBuf[3] = vecBuf[3].maxData(VectorBuffer::loadu(ptr + 3 * vecBufLength));
}
for(int k = 0; k < 4; k++){
for(int l = 0; l < vecBufLength; l++)
*(op + j * 32 + 8 * k + l) = vecBuf[k][l];
}
}
}
}
}//run vector buffer
else{
for(int k = 0; k < blockNum; k++){ for(int k = 0; k < blockNum; k++){
DTYPE * ip = (DTYPE*)input->data + blockSize * k; DTYPE * ip = (DTYPE*)input->data + blockSize * k;
DTYPE * op = (DTYPE*)output->data + stride * k; DTYPE * op = (DTYPE*)output->data + stride * k;
for(int i = 0; i < stride; i++){ for(int i = 0; i < stride; i++){
DTYPE max = FLOAT_MIN; DTYPE max = DTYPE_MIN;
DTYPE * ipe = ip + blockSize; DTYPE * ipe = ip + blockSize;
for(DTYPE * ipb = ip + i; ipb < ipe; ipb += stride){ for(DTYPE * ipb = ip + i; ipb < ipe; ipb += stride){
DTYPE v = *ipb; DTYPE v = *ipb;
...@@ -91,6 +149,7 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -91,6 +149,7 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
} }
} }
} }
}
} }
/* /*
...@@ -122,8 +181,10 @@ XTensor ReduceMax(const XTensor &input, int dim) ...@@ -122,8 +181,10 @@ XTensor ReduceMax(const XTensor &input, int dim)
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
/* tensor connection */ /* tensor connection */
if (input.enableGrad) {
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -162,7 +223,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim) ...@@ -162,7 +223,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim)
/* call _ReduceMax function */ /* call _ReduceMax function */
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
if (output.enableGrad) { if (input.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -77,8 +77,10 @@ XTensor ReduceMean(const XTensor &input, int dim) ...@@ -77,8 +77,10 @@ XTensor ReduceMean(const XTensor &input, int dim)
_ReduceMean(&input, &output, dim); _ReduceMean(&input, &output, dim);
/* tensor connection */ /* tensor connection */
if (input.enableGrad) {
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -119,7 +121,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim) ...@@ -119,7 +121,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim)
/* call _ReduceMean function */ /* call _ReduceMean function */
_ReduceMean(&input, &output, dim); _ReduceMean(&input, &output, dim);
if (output.enableGrad) { if (input.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -23,6 +23,9 @@ ...@@ -23,6 +23,9 @@
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceSum.cuh" #include "ReduceSum.cuh"
#include "../../XName.h" #include "../../XName.h"
#include "../../XBLAS.h"
#include "VectorBuffer.h"
#include <iostream>
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -82,6 +85,74 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -82,6 +85,74 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} }
blockSize = stride * strideNum; blockSize = stride * strideNum;
if(input->dimSizeRDI[0] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSizeRDI[0] >= 32){
int vecBufLength = 32 / sizeof(DTYPE);
if(dimRDI == 0){
//data is contiguous in dim 0
for(int i = 0; i < blockNum; i++){
// stride = 1
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + i : NULL;
DTYPE bias[32 / sizeof(DTYPE)] = {0};
if(shift != NULL){
for(int k = 0; k < 32 / sizeof(DTYPE); k++)
bias[k] = *(sp);
}
VectorBuffer vecBuf[4];
for(int j = 0; j < 4; j++){
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength, isExp, power, bias);
}
for(int j = 1; j < strideNum / 32; j++){
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength);
vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias);
vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias);
vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias);
vecBuf[3] = vecBuf[3] + VectorBuffer::loadu(ptr + 3 * vecBufLength, isExp, power, bias);
}
vecBuf[0] = ((vecBuf[0] + vecBuf[1]) + (vecBuf[2] + vecBuf[3]));
DTYPE sum = (DTYPE) 0.0;
for(int k = 0; k < vecBufLength; k++){
sum = sum + vecBuf[0][k];
}
*op = sum;
}
} else{
//data is separated
for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSizeRDI[0] / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL;
DTYPE bias[4 * 32 / sizeof(DTYPE)] = {0};
if(shift != NULL){
for(int k = 0; k < 4 * 32 / sizeof(DTYPE); k++)
bias[k] = *(sp + k);
}
VectorBuffer vecBuf[4];
for(int k = 0; k < 4; k++){
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + j * 32 / sizeof(DTYPE));
}
for(int k = 1; k < strideNum; k++){
DTYPE * ptr = ip + k * stride + (j * 4) * vecBufLength;
vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias);
vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias + 1 * vecBufLength);
vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias + 2 * vecBufLength);
vecBuf[3] = vecBuf[3] + VectorBuffer::loadu(ptr + 3 * vecBufLength, isExp, power, bias + 3 * vecBufLength);
}
for(int k = 0; k < 4; k++){
for(int l = 0; l < vecBufLength; l++)
*(op + j * 32 + 8 * k + l) = vecBuf[k][l];
}
}
}
}
}//run vector buffer
else{
for(int k = 0; k < blockNum; k++){ for(int k = 0; k < blockNum; k++){
DTYPE * ip = (DTYPE*)input->data + blockSize * k; DTYPE * ip = (DTYPE*)input->data + blockSize * k;
DTYPE * op = (DTYPE*)output->data + stride * k; DTYPE * op = (DTYPE*)output->data + stride * k;
...@@ -195,6 +266,8 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -195,6 +266,8 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} }
} }
} }
}
} }
/* /*
...@@ -233,10 +306,12 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -233,10 +306,12 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
_ReduceSum(&input, &output, dim, &shift, power, isExp); _ReduceSum(&input, &output, dim, &shift, power, isExp);
/* tensor connection */ /* tensor connection */
if (input.enableGrad) {
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power); XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp); XLink::AddParamToHeadBool(&output, isExp);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -268,7 +343,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh ...@@ -268,7 +343,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp); _ReduceSum(&input, &output, dim, &shift, power, isExp);
if (output.enableGrad) { if (input.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
...@@ -312,10 +387,12 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp) ...@@ -312,10 +387,12 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
_ReduceSum(&input, &output, dim, NULL, power, isExp); _ReduceSum(&input, &output, dim, NULL, power, isExp);
/* tensor connection */ /* tensor connection */
if (input.enableGrad) {
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power); XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp); XLink::AddParamToHeadBool(&output, isExp);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -361,7 +438,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool ...@@ -361,7 +438,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp); _ReduceSum(&input, &output, dim, NULL, power, isExp);
if (output.enableGrad) { if (input.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -73,8 +73,10 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) ...@@ -73,8 +73,10 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
_ReduceSumSquared(&input, &output, dim, &shift); _ReduceSumSquared(&input, &output, dim, &shift);
/* tensor connection */ /* tensor connection */
if (input.enableGrad) {
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -116,7 +118,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen ...@@ -116,7 +118,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
/* call _ReduceSumSquared function */ /* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift); _ReduceSumSquared(&input, &output, dim, &shift);
if (output.enableGrad) { if (input.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -76,8 +76,10 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean) ...@@ -76,8 +76,10 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
/* tensor connection */ /* tensor connection */
if (input.enableGrad) {
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE); XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -119,7 +121,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso ...@@ -119,7 +121,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
/* call _ReduceVariance function */ /* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
if (output.enableGrad) { if (input.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE); XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
#include "VectorBuffer.h"
namespace nts {
/* data size for each buffer */
int VectorBuffer::size()
{
return 32 / sizeof(DTYPE);
}
/* constructor */
VectorBuffer::VectorBuffer()
{
}
/*
constructor
initial values with val
*/
VectorBuffer::VectorBuffer(DTYPE val)
{
for (int i = 0; i != size(); i++) {
values[i] = val;
}
}
/* load data */
VectorBuffer VectorBuffer::loadu(const DTYPE* ptr, bool isExp , DTYPE power , DTYPE* bias )
{
int count = 32 / sizeof(DTYPE);
VectorBuffer vec;
if (isExp) {
if (bias == NULL) {
if (power == (DTYPE)1.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(*(ptr + i));
}
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp((*(ptr + i)) * (*(ptr + i)));
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(sqrt(*(ptr + i)));
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(pow(*(ptr + i), power));
}
}
}/*is bias == NULL*/
else {
if (power == (DTYPE)1.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(*(ptr + i) - bias[i]);
}
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
DTYPE value = *(ptr + i) - bias[i];
vec.values[i] = (DTYPE)exp(value * value);
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(sqrt(*(ptr + i) - bias[i]));
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(pow(*(ptr + i) - bias[i], power));
}
}
}
}//isExp
else {
if (bias == NULL) {
if (power == (DTYPE)1.0) {
memcpy(vec.values, ptr, count * sizeof(DTYPE));
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (*(ptr + i)) * (*(ptr + i));
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)sqrt(*(ptr + i));
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)pow(*(ptr + i), power);
}
}
}// if bias == NULL
else {
if (power == (DTYPE)1.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = *(ptr + i) - bias[i];
}
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
DTYPE value = *(ptr + i) - bias[i];
vec.values[i] = value * value;
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)sqrt(*(ptr + i) - bias[i]);
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)pow(*(ptr + i) - bias[i], power);
}
}
}
}
return vec;
}
/* overloading [] */
const DTYPE& VectorBuffer::operator[](int idx)const
{
return values[idx];
}
/* overloading + */
VectorBuffer VectorBuffer::operator+(const VectorBuffer &a)
{
for (int i = 0; i != a.size(); i++) {
this->values[i] = a[i] + this->values[i];
}
return *this;
}
/* conculte the max of two buffer */
VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) {
for (int i = 0; i != a.size(); i++) {
this->values[i] = MAX(a[i], this->values[i]);
}
return *this;
}
}/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
//#include <cstring>
#include <math.h>
#include "../../XGlobal.h"
namespace nts {
class VectorBuffer {
private:
/* buffer for concluter */
DTYPE values[32 / sizeof(DTYPE)] = { 0 };
public:
/* data size for each buffer */
static int size();
/* constructor */
VectorBuffer();
/* constructor */
VectorBuffer(DTYPE val);
/* load data */
static VectorBuffer loadu(const DTYPE* ptr, bool isExp = false, DTYPE power = (DTYPE)1.0F, DTYPE* bias = NULL);
/* overloading [] */
const DTYPE& operator[](int idx)const;
/* overloading + */
VectorBuffer operator+(const VectorBuffer &a);
/* conculte the max of two buffer */
VectorBuffer maxData(const VectorBuffer &a);
};
}
\ No newline at end of file
...@@ -99,8 +99,10 @@ XTensor Concatenate(const TensorList &smalls, int dim) ...@@ -99,8 +99,10 @@ XTensor Concatenate(const TensorList &smalls, int dim)
_Merge(&smalls, &big, dim); _Merge(&smalls, &big, dim);
/* tensor connection */ /* tensor connection */
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE); XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim); XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -127,8 +129,10 @@ XTensor Concatenate(const TensorList &smalls, int dim) ...@@ -127,8 +129,10 @@ XTensor Concatenate(const TensorList &smalls, int dim)
_ConcatenateSolely(&smalls, &big, dim); _ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */ /* tensor connection */
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE); XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim); XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -309,8 +313,10 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim) ...@@ -309,8 +313,10 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
_Merge(&smalls, &big, dim); _Merge(&smalls, &big, dim);
/* tensor connection */ /* tensor connection */
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE); XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim); XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -337,8 +343,10 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim) ...@@ -337,8 +343,10 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
_ConcatenateSolely(&smalls, &big, dim); _ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */ /* tensor connection */
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE); XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim); XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -222,9 +222,11 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) ...@@ -222,9 +222,11 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
_Merge(&s, &t, whereToMerge, leadingDim); _Merge(&s, &t, whereToMerge, leadingDim);
/* tensor connections */ /* tensor connections */
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE); XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge); XLink::AddParamToHeadInt(&t, whereToMerge);
XLink::AddParamToHeadInt(&t, leadingDim); XLink::AddParamToHeadInt(&t, leadingDim);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -261,7 +263,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim) ...@@ -261,7 +263,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
/* call _Merge function */ /* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim); _Merge(&s, &t, whereToMerge, leadingDim);
if (t.enableGrad) { if (s.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE); XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge); XLink::AddParamToHeadInt(&t, whereToMerge);
...@@ -412,8 +414,10 @@ XTensor Merge(const TensorList &smalls, int whereToMerge) ...@@ -412,8 +414,10 @@ XTensor Merge(const TensorList &smalls, int whereToMerge)
_Merge(&smalls, &big, whereToMerge); _Merge(&smalls, &big, whereToMerge);
/* tensor connections */ /* tensor connections */
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST); XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge); XLink::AddParamToHeadInt(&big, whereToMerge);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -453,8 +457,10 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge) ...@@ -453,8 +457,10 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
_Merge(&smalls, &big, whereToMerge); _Merge(&smalls, &big, whereToMerge);
/* tensor connections */ /* tensor connections */
if (smallA.enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST); XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge); XLink::AddParamToHeadInt(&big, whereToMerge);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -43,7 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize) ...@@ -43,7 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t.Reshape(order, dimSize); t.Reshape(order, dimSize);
/* tensor connections */ /* tensor connections */
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE); XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
}
return t; return t;
} }
...@@ -57,7 +59,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize) ...@@ -57,7 +59,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
/* call Reshape function */ /* call Reshape function */
t.Reshape(order, dimSize); t.Reshape(order, dimSize);
if (t.enableGrad) { if (s.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE); XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
} }
......
...@@ -217,9 +217,11 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -217,9 +217,11 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
_Split(&s, &t, whereToSplit, splitNum); _Split(&s, &t, whereToSplit, splitNum);
/* tensor connections */ /* tensor connections */
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT); XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit); XLink::AddParamToHeadInt(&t, whereToSplit);
XLink::AddParamToHeadInt(&t, splitNum); XLink::AddParamToHeadInt(&t, splitNum);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -251,7 +253,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum) ...@@ -251,7 +253,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
/* call _Split function */ /* call _Split function */
_Split(&s, &t, whereToSplit, splitNum); _Split(&s, &t, whereToSplit, splitNum);
if (t.enableGrad) { if (s.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT); XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit); XLink::AddParamToHeadInt(&t, whereToSplit);
...@@ -409,6 +411,8 @@ void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNu ...@@ -409,6 +411,8 @@ void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNu
/* tensor connections */ /* tensor connections */
for(int i = 0; i < smalls.count; i++){ for(int i = 0; i < smalls.count; i++){
XTensor * s = (XTensor*)smalls.Get(i); XTensor * s = (XTensor*)smalls.Get(i);
if (s->enableGrad) {
XLink::MakeLink(&big, NULL, s, SHAPE_SPLIT_LIST); XLink::MakeLink(&big, NULL, s, SHAPE_SPLIT_LIST);
XLink::AddParamToHeadInt(s, whereToSplit); XLink::AddParamToHeadInt(s, whereToSplit);
...@@ -416,6 +420,7 @@ void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNu ...@@ -416,6 +420,7 @@ void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNu
block, rather than the total number of the splits */ block, rather than the total number of the splits */
XLink::AddParamToHeadInt(s, i); XLink::AddParamToHeadInt(s, i);
} }
}
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -121,7 +121,9 @@ XTensor Squeeze(XTensor & source, int leadingDim) ...@@ -121,7 +121,9 @@ XTensor Squeeze(XTensor & source, int leadingDim)
_Squeeze(&source, &target, leadingDim); _Squeeze(&source, &target, leadingDim);
/* tensor connections */ /* tensor connections */
if (source.enableGrad) {
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE); XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
}
return target; return target;
} }
...@@ -135,7 +137,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim) ...@@ -135,7 +137,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim)
/* call _Squeeze function */ /* call _Squeeze function */
_Squeeze(&source, &target, leadingDim); _Squeeze(&source, &target, leadingDim);
if (target.enableGrad) { if (source.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE); XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
} }
......
...@@ -144,9 +144,11 @@ XTensor Transpose(const XTensor &a, const int i, const int j) ...@@ -144,9 +144,11 @@ XTensor Transpose(const XTensor &a, const int i, const int j)
_Transpose(&a, &b, i, j); _Transpose(&a, &b, i, j);
/* tensor connection */ /* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, SHAPE_TRANSPOSE); XLink::MakeLink(&a, NULL, &b, SHAPE_TRANSPOSE);
XLink::AddParamToHeadInt(&b, i); XLink::AddParamToHeadInt(&b, i);
XLink::AddParamToHeadInt(&b, j); XLink::AddParamToHeadInt(&b, j);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -156,9 +156,11 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize) ...@@ -156,9 +156,11 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
_Unsqueeze(&a, &b, dim, dSize); _Unsqueeze(&a, &b, dim, dSize);
/* tensor connections */ /* tensor connections */
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE); XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim); XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&b, dSize); XLink::AddParamToHeadInt(&b, dSize);
}
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -191,7 +193,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize) ...@@ -191,7 +193,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
/* call _Unsqueeze function */ /* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize); _Unsqueeze(&a, &b, dim, dSize);
if (b.enableGrad) { if (a.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE); XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim); XLink::AddParamToHeadInt(&b, dim);
......
...@@ -377,8 +377,8 @@ get the top-k items ...@@ -377,8 +377,8 @@ get the top-k items
template<class T> __global__ template<class T> __global__
void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T minValue, T * output, int * index) void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T minValue, T * output, int * index)
{ {
__shared__ CudaHeapNode<T> heapData[(SHARED_MEMORY_SIZE - 1024 * sizeof(T)) / sizeof(CudaHeapNode<T>)]; __shared__ CudaHeapNode<T> heapData[(SHARED_MEMORY_SIZE - 512 * sizeof(T)) / sizeof(CudaHeapNode<T>)];
__shared__ T eachHeapMaxValue[1024]; __shared__ T eachHeapMaxValue[512];
/*optimization k size the parameter must more than half of k*/ /*optimization k size the parameter must more than half of k*/
int parameter = 0; int parameter = 0;
...@@ -429,7 +429,7 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi ...@@ -429,7 +429,7 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
} }
__syncthreads(); __syncthreads();
/*to merge the heap use another way*/ /* to merge the heap use another way */
T minData = minValue; T minData = minValue;
int heapLimit = heap.count / 2; int heapLimit = heap.count / 2;
if (heapLimit % 2 == 0 && heapLimit != 0) heapLimit -= 1; if (heapLimit % 2 == 0 && heapLimit != 0) heapLimit -= 1;
...@@ -438,12 +438,13 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi ...@@ -438,12 +438,13 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
minData = heap.items[counter].value; minData = heap.items[counter].value;
} }
eachHeapMaxValue[threadIdx.y * blockDim.x + threadIdx.x] = minData; eachHeapMaxValue[threadIdx.y * blockDim.x + threadIdx.x] = minData;
//need more optimation //need more optimation
if (i == 0) { if (i == 0) {
int threadLimit = (threadIdx.y + 1) * blockDim.x; int threadLimit = threadIdx.y * blockDim.x + min(blockDim.x,strideNum);
CudaXHeap<MIN_HEAP, T> chooseHeap(k, heapData + k * ((blockDim.x * blockDim.y) + threadIdx.y)); CudaXHeap<MIN_HEAP, T> chooseHeap(k, heapData + k * ((blockDim.x * blockDim.y) + threadIdx.y));
int counter = threadIdx.y * blockDim.x; int counter = threadIdx.y * blockDim.x;
for (; counter < threadIdx.y * blockDim.x + k; ++counter) { for (; counter < threadIdx.y * blockDim.x + min(k, blockDim.x); ++counter) {
chooseHeap.Push(counter, eachHeapMaxValue[counter]); chooseHeap.Push(counter, eachHeapMaxValue[counter]);
} }
for (; counter < threadLimit; ++counter) { for (; counter < threadLimit; ++counter) {
...@@ -451,15 +452,16 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi ...@@ -451,15 +452,16 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
chooseHeap.ReplaceTop(counter, eachHeapMaxValue[counter]); chooseHeap.ReplaceTop(counter, eachHeapMaxValue[counter]);
} }
} }
int heapNum = chooseHeap.count;
CudaXHeap<MIN_HEAP, T> ansHeapData(k, k - parameter, heapData + k * chooseHeap.items[0].index); CudaXHeap<MIN_HEAP, T> ansHeapData(k, k - parameter, heapData + k * chooseHeap.items[0].index);
int miss = parameter; int miss = parameter;
for (counter = 1; counter < k; ++counter) { for (counter = 1; counter < heapNum; ++counter) {
chooseHeap.items[0] = chooseHeap.items[chooseHeap.count - 1]; chooseHeap.items[0] = chooseHeap.items[chooseHeap.count - 1];
chooseHeap.count--; chooseHeap.count--;
chooseHeap.Down(0); chooseHeap.Down(0);
CudaHeapNode<T> * cmpHeapData = heapData + k * (chooseHeap.items[0].index); CudaHeapNode<T> * cmpHeapData = heapData + k * (chooseHeap.items[0].index);
int cmpHeapLimit = 0; int cmpHeapLimit = 0;
if (counter + heapLimit <= k - parameter){ if (counter + heapLimit <= k - parameter && heapNum == k){
cmpHeapLimit = heapLimit; cmpHeapLimit = heapLimit;
} }
/* take the max data from the minHeap,so start search from the leaf node */ /* take the max data from the minHeap,so start search from the leaf node */
...@@ -840,7 +842,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -840,7 +842,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
/* we run the kernel if the heaps can fit into the shared memory */ /* we run the kernel if the heaps can fit into the shared memory */
cudaGrids[1] *= cudaBlocks[1]; cudaGrids[1] *= cudaBlocks[1];
cudaBlocks[1] = 1; cudaBlocks[1] = 1;
if ((cudaBlocks[0] * cudaBlocks[1] + 1) * k * (a->unitSize + sizeof(int)) < SHARED_MEMORY_SIZE) { if ((cudaBlocks[0] * cudaBlocks[1] + 1) * k * (a->unitSize + sizeof(int)) + (512 * sizeof(int))< SHARED_MEMORY_SIZE) {
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelTopK3<DTYPE> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >>> KernelTopK3<DTYPE> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >>>
((DTYPE*)a->data, stride, strideNumA, blockNum, k, DTYPE_MIN, ((DTYPE*)a->data, stride, strideNumA, blockNum, k, DTYPE_MIN,
...@@ -869,7 +871,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -869,7 +871,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
//delete indexA; //delete indexA;
int workerNum = WORKERSNUM; int workerNum = WORKERSNUM;
GDevs.GetCudaThread2D(a->mem->devID, GDevs.GetCudaThread2D(a->devID,
workerNum, stride * blockNum, MAX_INT, workerNum, stride * blockNum, MAX_INT,
cudaGrids, cudaBlocks); cudaGrids, cudaBlocks);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
......
...@@ -81,8 +81,10 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale) ...@@ -81,8 +81,10 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
_ScaleAndShiftMe(&c, scale); _ScaleAndShiftMe(&c, scale);
/* tensor connections */ /* tensor connections */
if (x.enableGrad) {
XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX); XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
XLink::AddParamToHead(&c, scale); XLink::AddParamToHead(&c, scale);
}
return c; return c;
} }
......
...@@ -78,7 +78,9 @@ XTensor HardTanH(const XTensor &x) ...@@ -78,7 +78,9 @@ XTensor HardTanH(const XTensor &x)
_HardTanH(&x, &y); _HardTanH(&x, &y);
/* tensor connection */ /* tensor connection */
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH); XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
}
return y; return y;
} }
...@@ -92,7 +94,7 @@ void HardTanH(const XTensor &x, XTensor &y) ...@@ -92,7 +94,7 @@ void HardTanH(const XTensor &x, XTensor &y)
/* call _HardTanH function */ /* call _HardTanH function */
_HardTanH(&x, &y); _HardTanH(&x, &y);
if (y.enableGrad) { if (x.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH); XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
} }
......
...@@ -54,7 +54,9 @@ XTensor Identity(const XTensor &x) ...@@ -54,7 +54,9 @@ XTensor Identity(const XTensor &x)
_Identity(&x, &y); _Identity(&x, &y);
/* tensor connection */ /* tensor connection */
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY); XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
}
return y; return y;
} }
...@@ -68,7 +70,7 @@ void Identity(const XTensor &x, XTensor &y) ...@@ -68,7 +70,7 @@ void Identity(const XTensor &x, XTensor &y)
/* call _Identity function */ /* call _Identity function */
_Identity(&x, &y); _Identity(&x, &y);
if (y.enableGrad) { if (x.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY); XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
} }
......
...@@ -188,8 +188,10 @@ XTensor LogSoftmax(const XTensor &x, int leadDim) ...@@ -188,8 +188,10 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
_LogSoftmax(&x, &y, ld); _LogSoftmax(&x, &y, ld);
/* tensor connection */ /* tensor connection */
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
}
return y; return y;
} }
...@@ -215,7 +217,7 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim) ...@@ -215,7 +217,7 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
/* call _LogSoftmax function */ /* call _LogSoftmax function */
_LogSoftmax(&x, &y, ld); _LogSoftmax(&x, &y, ld);
if (y.enableGrad) { if (x.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
......
...@@ -70,7 +70,9 @@ XTensor Rectify(const XTensor &x) ...@@ -70,7 +70,9 @@ XTensor Rectify(const XTensor &x)
_Rectify(&x, &y); _Rectify(&x, &y);
/* tensor connection */ /* tensor connection */
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY); XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
}
return y; return y;
} }
...@@ -84,7 +86,7 @@ void Rectify(const XTensor &x, XTensor &y) ...@@ -84,7 +86,7 @@ void Rectify(const XTensor &x, XTensor &y)
/* call _Rectify function */ /* call _Rectify function */
_Rectify(&x, &y); _Rectify(&x, &y);
if (y.enableGrad) { if (x.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY); XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
} }
......
...@@ -73,7 +73,9 @@ XTensor Sigmoid(const XTensor &x) ...@@ -73,7 +73,9 @@ XTensor Sigmoid(const XTensor &x)
_Sigmoid(&x, &y); _Sigmoid(&x, &y);
/* tensor connection */ /* tensor connection */
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID); XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
}
return y; return y;
} }
...@@ -87,7 +89,7 @@ void Sigmoid(const XTensor &x, XTensor &y) ...@@ -87,7 +89,7 @@ void Sigmoid(const XTensor &x, XTensor &y)
/* call _Sigmoid function */ /* call _Sigmoid function */
_Sigmoid(&x, &y); _Sigmoid(&x, &y);
if (y.enableGrad) { if (x.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID); XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
} }
......
...@@ -142,8 +142,10 @@ XTensor Softmax(const XTensor &x, int leadDim) ...@@ -142,8 +142,10 @@ XTensor Softmax(const XTensor &x, int leadDim)
_Softmax(&x, &y, ld); _Softmax(&x, &y, ld);
/* tensor connection */ /* tensor connection */
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
}
return y; return y;
} }
...@@ -161,7 +163,7 @@ void Softmax(const XTensor &x, XTensor &y, int leadDim) ...@@ -161,7 +163,7 @@ void Softmax(const XTensor &x, XTensor &y, int leadDim)
/* call _Softmax function */ /* call _Softmax function */
_Softmax(&x, &y, ld); _Softmax(&x, &y, ld);
if (y.enableGrad) { if (x.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
......
...@@ -277,8 +277,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold, ...@@ -277,8 +277,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
tails.Add((XTensor*)&gold); tails.Add((XTensor*)&gold);
tails.Add(weight); tails.Add(weight);
tails.Add(padding); tails.Add(padding);
if (output.enableGrad) {
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY); XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim); XLink::AddParamToHeadInt(&loss, dim);
}
return loss; return loss;
} }
...@@ -302,8 +305,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold, ...@@ -302,8 +305,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
tails.Add((XTensor*)&gold); tails.Add((XTensor*)&gold);
tails.Add(weight); tails.Add(weight);
tails.Add((XTensor*)&padding); tails.Add((XTensor*)&padding);
if (output.enableGrad) {
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY); XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim); XLink::AddParamToHeadInt(&loss, dim);
}
return loss; return loss;
} }
......
...@@ -406,6 +406,68 @@ bool TestSetData5() ...@@ -406,6 +406,68 @@ bool TestSetData5()
#endif // USE_CUDA #endif // USE_CUDA
} }
/*
case 6: test SetDataRange function.
generate data items with a range by start, end and the step
*/
bool TestSetData6()
{
/* a input tensor of size (5) */
int order = 1;
int * dimSize = new int[order];
dimSize[0] = 5;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE answer[5] = {5.2F, 3.2F, 1.2F, -0.8F, -2.8F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(order, dimSize);
/* initialize variables */
s->SetZeroAll();
/* call _SetDataRange function */
_SetDataRange(s, 5.2, -3.2, -2);
/* check results */
cpuTest = s->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetZeroAll();
/* call _SetDataRange function */
_SetDataRange(sGPU, 5.2, -3.2, -2);
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete s;
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -462,6 +524,15 @@ bool TestSetData() ...@@ -462,6 +524,15 @@ bool TestSetData()
else else
XPRINT(0, stdout, ">> case 5 passed!\n"); XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestSetData6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -105,10 +105,62 @@ bool TestTopK1() ...@@ -105,10 +105,62 @@ bool TestTopK1()
TopK(sUser, tUser2, indexUser2, dim, k); TopK(sUser, tUser2, indexUser2, dim, k);
/* check results */ /* check results */
cpuTest = t1->CheckData(tAnswer1, tUnitNum) && tUser1.CheckData(tAnswer1, tUnitNum)
&& t2->CheckData(tAnswer2, tUnitNum) && tUser2.CheckData(tAnswer2, tUnitNum) for (int i = 0; i < tDimSize[1]; ++i)
&& index1->CheckData(indexAnswer1, tUnitNum) && indexUser1.CheckData(indexAnswer1, tUnitNum) {
&& index2->CheckData(indexAnswer2, tUnitNum) && indexUser2.CheckData(indexAnswer2, tUnitNum); for (int j = 0; j < tDimSize[0]; ++j)
{
float tmpData = ((float *)t1->data)[i + tDimSize[1] * j];
int tmpIndex = ((int *)index1->data)[i + tDimSize[1] * j];
float tmpDataUser = ((float *)tUser1.data)[i + tDimSize[1] * j];
int tmpIndexUser = ((int *)indexUser1.data)[i + tDimSize[1] * j];
bool flag = false;
bool flagUser = false;
for (int k = 0; k < tDimSize[0]; ++k)
{
float* ans = tAnswer1[0];
int* ansIndex = indexAnswer1[0];
if (tmpData == ans[i + tDimSize[1] * k] && tmpIndex == ansIndex[i + tDimSize[1] * k])
{
flag = true;
}
if (tmpDataUser == ans[i + tDimSize[1] * k] && tmpIndexUser == ansIndex[i + tDimSize[1] * k])
{
flagUser = true;
}
}
cpuTest = cpuTest&&flag&&flagUser;
}
}
for (int i = 0; i < tDimSize[0]; ++i)
{
for (int j = 0; j < tDimSize[1]; ++j)
{
float tmpData = ((float *)t2->data)[i * tDimSize[1] + j];
int tmpIndex = ((int *)index2->data)[i * tDimSize[1] + j];
float tmpDataUser = ((float *)tUser2.data)[i * tDimSize[1] + j];
int tmpIndexUser = ((int *)indexUser2.data)[i * tDimSize[1] + j];
bool flag = false;
bool flagUser = false;
for (int k = 0; k < tDimSize[1]; ++k)
{
float* ans = tAnswer2[0];
int* ansIndex = indexAnswer2[0];
if (tmpData == ans[i * tDimSize[1] + k] && tmpIndex == ansIndex[i * tDimSize[1] + k])
{
flag = true;
}
if (tmpDataUser == ans[i * tDimSize[1] + k] && tmpIndexUser == ansIndex[i * tDimSize[1] + k])
{
flagUser = true;
}
}
cpuTest = cpuTest&&flag&&flagUser;
}
}
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -152,10 +204,74 @@ bool TestTopK1() ...@@ -152,10 +204,74 @@ bool TestTopK1()
TopK(sUserGPU, tUserGPU2, indexUserGPU2, dim, k); TopK(sUserGPU, tUserGPU2, indexUserGPU2, dim, k);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(tAnswer1, tUnitNum) && tUserGPU1.CheckData(tAnswer1, tUnitNum) float* checkData = new float[tUnitNum];
&& tGPU2->CheckData(tAnswer2, tUnitNum) && tUserGPU2.CheckData(tAnswer2, tUnitNum) int* checkIndex = new int[tUnitNum];
&& indexGPU1->CheckData(indexAnswer1, tUnitNum) && indexUserGPU1.CheckData(indexAnswer1, tUnitNum) float* checkDataUser = new float[tUnitNum];
&& indexGPU2->CheckData(indexAnswer2, tUnitNum) && indexUserGPU2.CheckData(indexAnswer2, tUnitNum); int* checkIndexUser = new int[tUnitNum];
cudaMemcpy(checkData, tGPU1->data, sizeof(DTYPE)*tUnitNum,cudaMemcpyDeviceToHost);
cudaMemcpy(checkIndex, indexGPU1->data, sizeof(int)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkDataUser, tUserGPU1.data, sizeof(DTYPE)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkIndexUser, indexUserGPU1.data, sizeof(int)*tUnitNum, cudaMemcpyDeviceToHost);
for (int i = 0; i < tDimSize[1]; ++i)
{
for (int j = 0; j < tDimSize[0]; ++j)
{
float tmpData = ((float *)checkData)[i + tDimSize[1] * j];
int tmpIndex = ((int *)checkIndex)[i + tDimSize[1] * j];
float tmpDataUser = ((float *)checkDataUser)[i + tDimSize[1] * j];
int tmpIndexUser = ((int *)checkIndexUser)[i + tDimSize[1] * j];
bool flag = false;
bool flagUser = false;
for (int k = 0; k < tDimSize[0]; ++k)
{
float* ans = tAnswer1[0];
int* ansIndex = indexAnswer1[0];
if (tmpData == ans[i + tDimSize[1] * k] && tmpIndex == ansIndex[i + tDimSize[1] * k])
{
flag = true;
}
if (tmpDataUser == ans[i + tDimSize[1] * k] && tmpIndexUser == ansIndex[i + tDimSize[1] * k])
{
flagUser = true;
}
}
gpuTest = gpuTest&&flag&&flagUser;
}
}
cudaMemcpy(checkData, tGPU2->data, sizeof(DTYPE)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkIndex, indexGPU2->data, sizeof(int)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkDataUser, tUserGPU2.data, sizeof(DTYPE)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkIndexUser, indexUserGPU2.data, sizeof(int)*tUnitNum, cudaMemcpyDeviceToHost);
for (int i = 0; i < tDimSize[0]; ++i)
{
for (int j = 0; j < tDimSize[1]; ++j)
{
float tmpData = ((float *)checkData)[i * tDimSize[1] + j];
int tmpIndex = ((int *)checkIndex)[i * tDimSize[1] + j];
float tmpDataUser = ((float *)checkDataUser)[i * tDimSize[1] + j];
int tmpIndexUser = ((int *)checkIndexUser)[i * tDimSize[1] + j];
bool flag = false;
bool flagUser = false;
for (int k = 0; k < tDimSize[1]; ++k)
{
float* ans = tAnswer2[0];
int* ansIndex = indexAnswer2[0];
if (tmpData == ans[i * tDimSize[1] + k] && tmpIndex == ansIndex[i * tDimSize[1] + k])
{
flag = true;
}
if (tmpDataUser == ans[i * tDimSize[1] + k] && tmpIndexUser == ansIndex[i * tDimSize[1] + k])
{
flagUser = true;
}
}
gpuTest = gpuTest&&flag&&flagUser;
}
}
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -170,6 +286,10 @@ bool TestTopK1() ...@@ -170,6 +286,10 @@ bool TestTopK1()
delete indexGPU2; delete indexGPU2;
delete[] sDimSize; delete[] sDimSize;
delete[] tDimSize; delete[] tDimSize;
delete[] checkData;
delete[] checkIndex;
delete[] checkDataUser;
delete[] checkIndexUser;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -247,8 +367,33 @@ bool TestTopK2() ...@@ -247,8 +367,33 @@ bool TestTopK2()
TopK(sUser, tUser, indexUser, dim, k); TopK(sUser, tUser, indexUser, dim, k);
/* check results */ /* check results */
cpuTest = t->CheckData(tAnswer, tUnitNum) && tUser.CheckData(tAnswer, tUnitNum)
&& index->CheckData(indexAnswer, tUnitNum) && indexUser.CheckData(indexAnswer, tUnitNum); for (int i = 0; i < tDimSize[0]; ++i)
{
for (int j = 0; j < tDimSize[1]; ++j)
{
float tmpData = ((float *)t->data)[i * tDimSize[1] + j];
int tmpIndex = ((int *)index->data)[i * tDimSize[1] + j];
float tmpDataUser = ((float *)tUser.data)[i * tDimSize[1] + j];
int tmpIndexUser = ((int *)indexUser.data)[i * tDimSize[1] + j];
bool flag = false;
bool flagUser = false;
for (int k = 0; k < tDimSize[1]; ++k)
{
float* ans = tAnswer[0];
int* ansIndex = indexAnswer[0];
if (tmpData == ans[i * tDimSize[1] + k] && tmpIndex == ansIndex[i * tDimSize[1] + k])
{
flag = true;
}
if (tmpDataUser == ans[i * tDimSize[1] + k] && tmpIndexUser == ansIndex[i * tDimSize[1] + k])
{
flagUser = true;
}
}
cpuTest = cpuTest&&flag&&flagUser;
}
}
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -279,8 +424,42 @@ bool TestTopK2() ...@@ -279,8 +424,42 @@ bool TestTopK2()
TopK(sUserGPU, tUserGPU, indexUserGPU, dim, k); TopK(sUserGPU, tUserGPU, indexUserGPU, dim, k);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(tAnswer, tUnitNum) && tUserGPU.CheckData(tAnswer, tUnitNum) float* checkData = new float[tUnitNum];
&& indexGPU->CheckData(indexAnswer, tUnitNum) && indexUserGPU.CheckData(indexAnswer, tUnitNum); int* checkIndex = new int[tUnitNum];
float* checkDataUser = new float[tUnitNum];
int* checkIndexUser = new int[tUnitNum];
cudaMemcpy(checkData, tGPU->data, sizeof(DTYPE)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkIndex, indexGPU->data, sizeof(int)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkDataUser, tUserGPU.data, sizeof(DTYPE)*tUnitNum, cudaMemcpyDeviceToHost);
cudaMemcpy(checkIndexUser, indexUserGPU.data, sizeof(int)*tUnitNum, cudaMemcpyDeviceToHost);
for (int i = 0; i < tDimSize[0]; ++i)
{
for (int j = 0; j < tDimSize[1]; ++j)
{
float tmpData = ((float *)checkData)[i * tDimSize[1] + j];
int tmpIndex = ((int *)checkIndex)[i * tDimSize[1] + j];
float tmpDataUser = ((float *)checkDataUser)[i * tDimSize[1] + j];
int tmpIndexUser = ((int *)checkIndexUser)[i * tDimSize[1] + j];
bool flag = false;
bool flagUser = false;
for (int k = 0; k < tDimSize[1]; ++k)
{
float* ans = tAnswer[0];
int* ansIndex = indexAnswer[0];
if (tmpData == ans[i * tDimSize[1] + k] && tmpIndex == ansIndex[i * tDimSize[1] + k])
{
flag = true;
}
if (tmpDataUser == ans[i * tDimSize[1] + k] && tmpIndexUser == ansIndex[i * tDimSize[1] + k])
{
flagUser = true;
}
}
gpuTest = gpuTest&&flag&&flagUser;
}
}
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -291,6 +470,10 @@ bool TestTopK2() ...@@ -291,6 +470,10 @@ bool TestTopK2()
delete indexGPU; delete indexGPU;
delete[] sDimSize; delete[] sDimSize;
delete[] tDimSize; delete[] tDimSize;
delete[] checkData;
delete[] checkIndex;
delete[] checkDataUser;
delete[] checkIndexUser;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论