Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
8
Issues
8
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
NiuTrans
NiuTrans.Tensor
Commits
bc49d32a
Commit
bc49d32a
authored
Oct 13, 2019
by
xuchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Merge with liyinqiao brach and add the max/min function
parent
cadda317
隐藏空白字符变更
内嵌
并排
正在显示
69 个修改的文件
包含
1827 行增加
和
1007 行删除
+1827
-1007
source/network/Main.cpp
+7
-3
source/tensor/XBLAS.cpp
+0
-175
source/tensor/XBLAS.h
+36
-77
source/tensor/XGlobal.h
+2
-0
source/tensor/XLink.cpp
+9
-0
source/tensor/XList.cpp
+80
-43
source/tensor/XList.h
+42
-25
source/tensor/XName.h
+3
-1
source/tensor/XTensor.cpp
+119
-189
source/tensor/XTensor.h
+43
-34
source/tensor/core/arithmetic/Div.cpp
+12
-8
source/tensor/core/arithmetic/DivDim.cpp
+7
-5
source/tensor/core/arithmetic/Mask.cpp
+5
-3
source/tensor/core/arithmetic/MatrixMul.cpp
+14
-10
source/tensor/core/arithmetic/MatrixMul2D.cpp
+12
-11
source/tensor/core/arithmetic/MatrixMulBatched.cpp
+13
-15
source/tensor/core/arithmetic/MulAndShift.cpp
+81
-5
source/tensor/core/arithmetic/MulAndShift.h
+3
-0
source/tensor/core/arithmetic/Multiply.cpp
+12
-8
source/tensor/core/arithmetic/MultiplyDim.cpp
+11
-7
source/tensor/core/arithmetic/Sub.cpp
+11
-7
source/tensor/core/arithmetic/SubDim.cpp
+6
-4
source/tensor/core/arithmetic/Sum.cpp
+63
-30
source/tensor/core/arithmetic/SumDim.cpp
+12
-8
source/tensor/core/getandset/ConvertDataType.cpp
+3
-2
source/tensor/core/getandset/OnehotAndIndex.cpp
+22
-46
source/tensor/core/getandset/OnehotAndIndex.cu
+2
-3
source/tensor/core/getandset/OnehotAndIndex.cuh
+3
-2
source/tensor/core/getandset/OnehotAndIndex.h
+4
-5
source/tensor/core/getandset/Select.cpp
+6
-4
source/tensor/core/getandset/SetData.cpp
+37
-0
source/tensor/core/getandset/SetData.h
+3
-0
source/tensor/core/math/Binary.cpp
+4
-2
source/tensor/core/math/Clip.cpp
+25
-23
source/tensor/core/math/Compare.cpp
+93
-0
source/tensor/core/math/Compare.cu
+47
-0
source/tensor/core/math/Compare.cuh
+6
-0
source/tensor/core/math/Compare.h
+31
-0
source/tensor/core/math/Normalize.cpp
+7
-5
source/tensor/core/math/ScaleAndShift.cpp
+6
-4
source/tensor/core/math/Unary.cpp
+4
-2
source/tensor/core/movement/CopyIndexed.cpp
+15
-11
source/tensor/core/movement/CopyValues.cpp
+3
-1
source/tensor/core/movement/Gather.cpp
+3
-1
source/tensor/core/reduce/ReduceMax.cpp
+78
-17
source/tensor/core/reduce/ReduceMean.cpp
+7
-5
source/tensor/core/reduce/ReduceSum.cpp
+176
-99
source/tensor/core/reduce/ReduceSumSquared.cpp
+6
-4
source/tensor/core/reduce/ReduceVariance.cpp
+7
-5
source/tensor/core/reduce/VectorBuffer.cpp
+172
-0
source/tensor/core/reduce/VectorBuffer.h
+54
-0
source/tensor/core/shape/Concatenate.cpp
+18
-10
source/tensor/core/shape/Merge.cpp
+14
-8
source/tensor/core/shape/Reshape.cpp
+5
-3
source/tensor/core/shape/Split.cpp
+14
-9
source/tensor/core/shape/Squeeze.cpp
+4
-2
source/tensor/core/shape/Transpose.cpp
+5
-3
source/tensor/core/shape/Unsqueeze.cpp
+6
-4
source/tensor/core/sort/TopK.cu
+27
-25
source/tensor/function/DropoutWithIndex.cpp
+4
-2
source/tensor/function/HardTanH.cpp
+4
-2
source/tensor/function/Identity.cpp
+4
-2
source/tensor/function/LogSoftmax.cpp
+5
-3
source/tensor/function/Rectify.cpp
+4
-2
source/tensor/function/Sigmoid.cpp
+4
-2
source/tensor/function/Softmax.cpp
+5
-3
source/tensor/loss/CrossEntropy.cpp
+11
-6
source/tensor/test/TSetData.cpp
+71
-0
source/tensor/test/TTopK.cpp
+195
-12
没有找到文件。
source/network/Main.cpp
查看文件 @
bc49d32a
...
...
@@ -71,6 +71,9 @@ void BackwardTest()
XTensor
a
;
XTensor
b
;
XTensor
c
;
a
.
enableGrad
=
true
;
b
.
enableGrad
=
false
;
c
.
enableGrad
=
false
;
XTensor
mean
;
XTensor
origin
;
InitTensor2D
(
&
a
,
2
,
3
);
...
...
@@ -88,14 +91,15 @@ void BackwardTest()
b
.
Set1D
(
2.0
F
,
0
);
b
.
Set1D
(
1.0
F
,
1
);
c
=
DivDim
(
a
,
b
,
0
);
DivDim
(
a
,
b
,
c
,
0
);
c
.
Dump
(
stderr
,
"c:"
);
auto
loss
=
CrossEntropy
(
c
,
a
);
//XLink::ShowNetwork(stderr, &c);
net
.
Backward
(
c
);
net
.
Backward
(
loss
);
net
.
Dump
(
stderr
);
a
.
grad
->
Dump
(
stderr
);
}
...
...
source/tensor/XBLAS.cpp
查看文件 @
bc49d32a
...
...
@@ -26,183 +26,9 @@
*
*/
#ifdef WIN32
#include <wtypes.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include "XBLAS.h"
#include "XGlobal.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
#ifdef WIN32
HINSTANCE
hBLASDll
;
#endif
/* single-precision floating matrix-matrix multiplication */
void
(
*
XBLAS_SGEMM
)(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
,
OPENBLAS_CONST
float
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
,
float
*
,
OPENBLAS_CONST
BLASINT
);
/* double-precision floating matrix-matrix multiplication */
void
(
*
XBLAS_DGEMM
)(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
,
OPENBLAS_CONST
double
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
,
double
*
,
OPENBLAS_CONST
BLASINT
);
/* single-precision floating vector-vector multiplication (rank-1) */
void
(
*
XBLAS_SGER
)(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
float
alpha
,
OPENBLAS_CONST
float
*
Y
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
*
,
OPENBLAS_CONST
BLASINT
,
float
*
,
OPENBLAS_CONST
BLASINT
);
/* double-precision floating vector-vector multiplication (rank-1) */
void
(
*
XBLAS_DGER
)(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
double
alpha
,
OPENBLAS_CONST
double
*
Y
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
*
,
OPENBLAS_CONST
BLASINT
,
double
*
,
OPENBLAS_CONST
BLASINT
);
/* set the number of threads */
void
(
*
XBLAS_SET_THREAD_NUM
)(
int
);
/* get the number of threads */
//int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/
int
(
*
XBLAS_GET_CORE_NUM
)();
/* get the CPU corename */
//char * (*XBLAS_GET_CORE_NAME)();
/* get the parallelization type used by OpenBLAS */
//int (*XBLAS_GET_PARALLEL_TYPE)(void);
#if defined(USE_BLAS)
/* load some stuff for BLAS */
void
LoadBLAS
(
const
char
*
dllFileName
)
{
#ifndef CUDA_BLAS
#ifdef _WIN32
#if defined(OPENBLAS)
/* non-ascii characters are not supported yet */
wchar_t
*
fn
=
new
wchar_t
[
strlen
(
dllFileName
)
+
1
];
memset
(
fn
,
0
,
sizeof
(
wchar_t
)
*
(
strlen
(
dllFileName
)
+
1
));
for
(
int
i
=
0
;
i
<
strlen
(
dllFileName
);
i
++
)
fn
[
i
]
=
dllFileName
[
i
];
hBLASDll
=
LoadLibrary
((
LPCWSTR
)
fn
);
if
(
!
hBLASDll
){
XPRINT1
(
0
,
stderr
,
"[LoadBLAS] Error! Cannot load dll %s!
\n
"
,
dllFileName
);
exit
(
1
);
}
/* matrix-matrix multiplicatoin */
(
FARPROC
&
)
XBLAS_SGEMM
=
GetProcAddress
(
hBLASDll
,
"cblas_sgemm"
);
(
FARPROC
&
)
XBLAS_DGEMM
=
GetProcAddress
(
hBLASDll
,
"cblas_dgemm"
);
/* vector-vector multiplication */
(
FARPROC
&
)
XBLAS_SGER
=
GetProcAddress
(
hBLASDll
,
"cblas_sger"
);
(
FARPROC
&
)
XBLAS_DGER
=
GetProcAddress
(
hBLASDll
,
"cblas_dger"
);
/* multi-threading */
(
FARPROC
&
)
XBLAS_SET_THREAD_NUM
=
GetProcAddress
(
hBLASDll
,
"openblas_set_num_threads"
);
//(FARPROC&)XBLAS_SET_THREAD_NUM = GetProcAddress(hBLASDll, "goto_set_num_threads");
//(FARPROC&)XBLAS_GET_THREAD_NUM = GetProcAddress(hBLASDll, "openblas_get_num_threads");
(
FARPROC
&
)
XBLAS_GET_CORE_NUM
=
GetProcAddress
(
hBLASDll
,
"openblas_get_num_procs"
);
//(FARPROC&)XBLAS_GET_CORE_NAME = GetProcAddress(hBLASDll, "openblas_get_corename");
//(FARPROC&)XBLAS_GET_PARALLEL_TYPE = GetProcAddress(hBLASDll, "openblas_get_parallel");
delete
[]
fn
;
#endif // defined(OPENBLAS)
#if defined(MKL)
/* non-ascii characters are not supported yet */
wchar_t
*
fn
=
new
wchar_t
[
strlen
(
dllFileName
)
+
1
];
memset
(
fn
,
0
,
sizeof
(
wchar_t
)
*
(
strlen
(
dllFileName
)
+
1
));
for
(
int
i
=
0
;
i
<
strlen
(
dllFileName
);
i
++
)
fn
[
i
]
=
dllFileName
[
i
];
hBLASDll
=
LoadLibrary
((
LPCWSTR
)
fn
);
if
(
!
hBLASDll
){
XPRINT1
(
0
,
stderr
,
"[LoadBLAS] Error! Cannot load dll %s!
\n
"
,
dllFileName
);
exit
(
1
);
}
/* matrix-matrix multiplicatoin */
(
FARPROC
&
)
XBLAS_SGEMM
=
GetProcAddress
(
hBLASDll
,
"cblas_sgemm"
);
(
FARPROC
&
)
XBLAS_DGEMM
=
GetProcAddress
(
hBLASDll
,
"cblas_dgemm"
);
/* vector-vector multiplication */
(
FARPROC
&
)
XBLAS_SGER
=
GetProcAddress
(
hBLASDll
,
"cblas_sger"
);
(
FARPROC
&
)
XBLAS_DGER
=
GetProcAddress
(
hBLASDll
,
"cblas_dger"
);
/* multi-threading */
(
FARPROC
&
)
XBLAS_SET_THREAD_NUM
=
GetProcAddress
(
hBLASDll
,
"MKL_Set_Num_Threads"
);
(
FARPROC
&
)
XBLAS_GET_CORE_NUM
=
GetProcAddress
(
hBLASDll
,
"MKL_Get_Max_Threads"
);
#endif // defined(MKL)
#else // _WIN32
XBLAS_SGEMM
=
&
cblas_sgemm
;
XBLAS_DGEMM
=
&
cblas_dgemm
;
XBLAS_SGER
=
&
cblas_sger
;
XBLAS_DGER
=
&
cblas_dger
;
#if defined(OPENBLAS)
XBLAS_SET_THREAD_NUM
=
&
openblas_set_num_threads
;
XBLAS_GET_CORE_NUM
=
&
openblas_get_num_procs
;
#endif // defined(OPENBLAS)
#if defined(MKL)
XBLAS_SET_THREAD_NUM
=
&
mkl_set_num_threads
;
XBLAS_GET_CORE_NUM
=
&
mkl_get_max_num_threads
;
#endif // defined(MKL)
#endif // _WIN32
XBLAS_SET_THREAD_NUM
(
1
);
#endif // ndef(CUDA_BLAS)
}
/* unload the libs */
void
UnloadBLAS
()
{
#ifdef _WIN32
if
(
!
FreeLibrary
(
hBLASDll
)){
XPRINT
(
0
,
stderr
,
"[UnloadBLAS] Error! Cannot free the BLAS dll!
\n
"
);
exit
(
1
);
}
#else
#endif
}
#else // undefined(USE_BLAS) || undefined(OPENBLAS)
void
LoadBLAS
(
const
char
*
dllFileName
)
{
XPRINT
(
0
,
stderr
,
"[LoadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!
\n
"
);
exit
(
1
);
}
void
UnloadBLAS
()
{
XPRINT
(
0
,
stderr
,
"[UnloadBLAS] Error! No Blas lib is available. Please use OPENBLAS or MKL!
\n
"
);
exit
(
1
);
}
#endif // defined(USE_BLAS) && defined(OPENBLAS)
}
/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
source/tensor/XBLAS.h
查看文件 @
bc49d32a
...
...
@@ -34,7 +34,6 @@ namespace nts{
/* some of the code below is from OpenBLAS (https://github.com/xianyi/OpenBLAS) */
//#define OPENBLAS
#define OPENBLAS_CONST const
typedef
int
BLASINT
;
...
...
@@ -46,7 +45,26 @@ typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
#if defined(USE_BLAS)
#ifdef OPENBLAS
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM openblas_set_num_threads
#define XBLAS_GET_CORE_NUM openblas_get_num_procs
#endif
#ifdef MKL
#define XBLAS_SGEMM cblas_sgemm
#define XBLAS_DGEMM cblas_dgemm
#define XBLAS_SGER cblas_sger
#define XBLAS_DGER cblas_dger
#define XBLAS_SAXPY cblas_saxpy
#define XBLAS_DAXPY cblas_daxpy
#define XBLAS_SET_THREAD_NUM MKL_Set_Num_Threads
#define XBLAS_GET_CORE_NUM MKL_Get_Max_Threads
#endif
/*
single/double-precision floating matrix-matrix multiplication (rank-3)
- SGEMM (ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
...
...
@@ -62,14 +80,14 @@ where A, B and C are matrices,
LDB(=N) specifies the size of the first dimension of B as declared in the calling (sub) program,
and LDC(=N) specifies the size of the first dimension of C as declared in the calling (sub) program.
*/
extern
"C"
void
(
*
XBLAS_SGEMM
)
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
extern
"C"
void
XBLAS_SGEMM
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
,
OPENBLAS_CONST
float
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
,
float
*
,
OPENBLAS_CONST
BLASINT
);
/* double-precision floating matrix-matrix multiplication */
extern
"C"
void
(
*
XBLAS_DGEMM
)
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
extern
"C"
void
XBLAS_DGEMM
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
,
OPENBLAS_CONST
double
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
*
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
,
...
...
@@ -88,24 +106,33 @@ where X and Y are vectors with m and n elements respectively,
E.g., if we are using CblasRowMajor, the leading dimension is the number of columns of A.
*/
extern
"C"
void
(
*
XBLAS_SGER
)
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
float
alpha
,
extern
"C"
void
XBLAS_SGER
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
float
alpha
,
OPENBLAS_CONST
float
*
Y
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
float
*
,
OPENBLAS_CONST
BLASINT
,
float
*
,
OPENBLAS_CONST
BLASINT
);
/* double-precision floating vector-vector multiplication (rank-1) */
extern
"C"
void
(
*
XBLAS_DGER
)
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
double
alpha
,
extern
"C"
void
XBLAS_DGER
(
OPENBLAS_CONST
enum
CBLAS_ORDER
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
double
alpha
,
OPENBLAS_CONST
double
*
Y
,
OPENBLAS_CONST
BLASINT
,
OPENBLAS_CONST
double
*
,
OPENBLAS_CONST
BLASINT
,
double
*
,
OPENBLAS_CONST
BLASINT
);
/*
some description
*/
extern
"C"
void
XBLAS_SAXPY
(
OPENBLAS_CONST
BLASINT
n
,
OPENBLAS_CONST
float
a
,
OPENBLAS_CONST
float
*
x
,
OPENBLAS_CONST
BLASINT
incx
,
OPENBLAS_CONST
float
*
y
,
OPENBLAS_CONST
BLASINT
incy
);
/* double-precision floating sumMe function */
extern
"C"
void
XBLAS_DAXPY
(
OPENBLAS_CONST
BLASINT
n
,
OPENBLAS_CONST
double
a
,
OPENBLAS_CONST
double
*
x
,
OPENBLAS_CONST
BLASINT
incx
,
OPENBLAS_CONST
double
*
y
,
OPENBLAS_CONST
BLASINT
incy
);
/* set the number of threads */
extern
"C"
void
(
*
XBLAS_SET_THREAD_NUM
)
(
int
);
extern
"C"
void
XBLAS_SET_THREAD_NUM
(
int
);
/* get the number of threads */
//extern "C" int (*XBLAS_GET_THREAD_NUM)();
/* get the number of physical processors (cores).*/
extern
"C"
int
(
*
XBLAS_GET_CORE_NUM
)
();
extern
"C"
int
XBLAS_GET_CORE_NUM
();
/* get the CPU corename */
//extern "C" char * (*XBLAS_GET_CORE_NAME)();
...
...
@@ -113,58 +140,6 @@ extern "C" int (*XBLAS_GET_CORE_NUM)();
/* get the parallelization type used by OpenBLAS */
//extern "C" int (*XBLAS_GET_PARALLEL_TYPE)(void);
/* linux systems */
#ifndef _WIN32
/* cblas functions that are imported from the lib. See cblas.h in OpenBlas for more information */
extern
"C"
void
cblas_sgemm
(
OPENBLAS_CONST
enum
CBLAS_ORDER
Order
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
TransA
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
TransB
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
BLASINT
K
,
OPENBLAS_CONST
float
alpha
,
OPENBLAS_CONST
float
*
A
,
OPENBLAS_CONST
BLASINT
lda
,
OPENBLAS_CONST
float
*
B
,
OPENBLAS_CONST
BLASINT
ldb
,
OPENBLAS_CONST
float
beta
,
float
*
C
,
OPENBLAS_CONST
BLASINT
ldc
);
extern
"C"
void
cblas_dgemm
(
OPENBLAS_CONST
enum
CBLAS_ORDER
Order
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
TransA
,
OPENBLAS_CONST
enum
CBLAS_TRANSPOSE
TransB
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
BLASINT
K
,
OPENBLAS_CONST
double
alpha
,
OPENBLAS_CONST
double
*
A
,
OPENBLAS_CONST
BLASINT
lda
,
OPENBLAS_CONST
double
*
B
,
OPENBLAS_CONST
BLASINT
ldb
,
OPENBLAS_CONST
double
beta
,
double
*
C
,
OPENBLAS_CONST
BLASINT
ldc
);
extern
"C"
void
cblas_sger
(
OPENBLAS_CONST
enum
CBLAS_ORDER
order
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
float
alpha
,
OPENBLAS_CONST
float
*
X
,
OPENBLAS_CONST
BLASINT
incX
,
OPENBLAS_CONST
float
*
Y
,
OPENBLAS_CONST
BLASINT
incY
,
float
*
A
,
OPENBLAS_CONST
BLASINT
lda
);
extern
"C"
void
cblas_dger
(
OPENBLAS_CONST
enum
CBLAS_ORDER
order
,
OPENBLAS_CONST
BLASINT
M
,
OPENBLAS_CONST
BLASINT
N
,
OPENBLAS_CONST
double
alpha
,
OPENBLAS_CONST
double
*
X
,
OPENBLAS_CONST
BLASINT
incX
,
OPENBLAS_CONST
double
*
Y
,
OPENBLAS_CONST
BLASINT
incY
,
double
*
A
,
OPENBLAS_CONST
BLASINT
lda
);
#if defined(OPENBLAS)
/* better control of multi-threading */
extern
"C"
void
openblas_set_num_threads
(
int
num_threads
);
extern
"C"
void
goto_set_num_threads
(
int
num_threads
);
//extern "C" int openblas_get_num_threads(void);
extern
"C"
int
openblas_get_num_procs
(
void
);
//extern "C" char* openblas_get_config(void);
//extern "C" char* openblas_get_corename(void);
//extern "C" int openblas_get_parallel(void);
#endif
#endif
#if defined(MKL)
/* better control of multi-threading */
//_Mkl_Api(void,MKL_Set_Num_Threads,(int nth))
//_Mkl_Api(int,MKL_Get_Max_Threads,(void))
extern
"C"
void
MKL_Set_Num_Threads
(
int
num_threads
);
extern
"C"
int
MKL_Get_Max_Threads
();
#define mkl_set_num_threads MKL_Set_Num_Threads
#define mkl_get_max_num_threads MKL_Get_Max_Threads
//extern "C" void mkl_set_num_threads(int num_threads);
//extern "C" void omp_set_num_threads(int num_threads);
//extern "C" int mkl_get_max_num_threads();
#endif
#if defined(CUDA_BLAS)
...
...
@@ -186,24 +161,8 @@ extern void BLASMatrixMULD(int deviceID, double * a, double * b, double * c, int
#endif
#endif
#ifdef _WIN32
#include "windows.h"
extern
HINSTANCE
hBLASDll
;
#else
#endif
/* load some stuff for BLAS */
extern
void
LoadBLAS
(
const
char
*
dllFileName
);
/* unload the libs */
extern
void
UnloadBLAS
();
}
/* end of the nts (NiuTrans.Tensor) namespace */
#endif
source/tensor/XGlobal.h
查看文件 @
bc49d32a
...
...
@@ -160,8 +160,10 @@ extern bool useCUDA;
/* BLAS interfaces */
#ifdef DOUBELPRICSION
#define GEMM XBLAS_DGEMM
#define AXPY XBLAS_DAXPY
#else
#define GEMM XBLAS_SGEMM
#define AXPY XBLAS_SAXPY
#endif
extern
void
InitGlobalAll
();
...
...
source/tensor/XLink.cpp
查看文件 @
bc49d32a
...
...
@@ -300,6 +300,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
if
(
h
==
NULL
)
return
;
if
(
!
t1
->
enableGrad
)
return
;
TensorList
list
(
2
);
list
.
Add
((
XTensor
*
)
t1
);
list
.
Add
((
XTensor
*
)
t2
);
...
...
@@ -320,6 +323,9 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
if
(
h
==
NULL
)
return
;
if
(
!
t1
->
enableGrad
||
!
t2
->
enableGrad
)
return
;
TensorList
list
(
3
);
list
.
Add
((
XTensor
*
)
t1
);
list
.
Add
((
XTensor
*
)
t2
);
...
...
@@ -370,6 +376,9 @@ create a hyper edge with a input tensors and a list of output tensors
*/
void
XLink
::
MakeLink
(
XTensor
*
t
,
TensorList
*
list
,
int
id
)
{
if
(
!
t
->
enableGrad
)
return
;
/* forward */
for
(
int
i
=
0
;
i
<
list
->
count
;
i
++
){
XTensor
*
h
=
(
XTensor
*
)
list
->
GetItem
(
i
);
...
...
source/tensor/XList.cpp
查看文件 @
bc49d32a
...
...
@@ -23,15 +23,11 @@
*
*/
#include "
XList
.h"
#include "
time
.h"
#include "XMem.h"
#include "XList.h"
#include "XGlobal.h"
#include <ctime>
#include <utility>
#include <algorithm>
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
...
...
@@ -78,7 +74,8 @@ TensorListBase<T>::TensorListBase(int myMaxNum, XMem* myMem)
template
<
typename
T
>
TensorListBase
<
T
>::~
TensorListBase
()
{
delete
[]
items
;
if
(
items
&&
mem
)
delete
[]
items
;
}
...
...
@@ -90,7 +87,7 @@ template <typename T>
void
TensorListBase
<
T
>::
Add
(
T
&&
item
)
{
if
(
count
==
maxNum
)
{
T
*
newItems
;
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
...
...
@@ -101,7 +98,13 @@ void TensorListBase<T>::Add(T&& item)
maxNum
=
maxNum
*
2
+
1
;
}
items
[
count
++
]
=
item
;
}
/* return number of elements */
template
<
typename
T
>
size_t
TensorListBase
<
T
>::
Size
()
{
return
count
;
}
/*
...
...
@@ -111,18 +114,18 @@ add an item into the list
template
<
typename
T
>
void
TensorListBase
<
T
>::
Add
(
const
T
&
item
)
{
if
(
count
==
maxNum
)
{
T
*
newItems
;
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
else
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
items
=
newItems
;
maxNum
=
maxNum
*
2
+
1
;
}
items
[
count
++
]
=
item
;
if
(
count
==
maxNum
)
{
T
*
newItems
;
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
else
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
items
=
newItems
;
maxNum
=
maxNum
*
2
+
1
;
}
items
[
count
++
]
=
item
;
}
/*
...
...
@@ -131,7 +134,7 @@ add a number of items into the list
>> inputItemCount - number of input items
*/
template
<
typename
T
>
void
TensorListBase
<
T
>::
Add
(
T
*
inputItems
,
int
inputItemCount
)
void
TensorListBase
<
T
>::
Add
(
const
T
*
inputItems
,
int
inputItemCount
)
{
if
(
count
+
inputItemCount
>=
maxNum
)
{
int
newMaxNum
=
(
count
+
inputItemCount
)
*
2
+
1
;
...
...
@@ -186,31 +189,31 @@ void TensorListBase<T>::Insert(int pos, const T& item)
template
<
typename
T
>
void
TensorListBase
<
T
>::
Insert
(
int
pos
,
T
&&
item
)
{
if
(
count
==
maxNum
)
{
T
*
newItems
;
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
else
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
items
=
newItems
;
maxNum
=
maxNum
*
2
+
1
;
}
for
(
int
i
=
count
-
1
;
i
>=
pos
;
i
--
)
items
[
i
+
1
]
=
items
[
i
];
items
[
pos
]
=
item
;
count
++
;
if
(
count
==
maxNum
)
{
T
*
newItems
;
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
else
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
items
=
newItems
;
maxNum
=
maxNum
*
2
+
1
;
}
for
(
int
i
=
count
-
1
;
i
>=
pos
;
i
--
)
items
[
i
+
1
]
=
items
[
i
];
items
[
pos
]
=
item
;
count
++
;
}
/* get the item at position i */
template
<
typename
T
>
T
&
TensorListBase
<
T
>::
GetItem
(
int
i
)
const
{
CheckNTErrors
(
i
>=
-
1
&&
i
<
count
,
"Index of a list item is out of scope!"
);
CheckNTErrors
(
i
>=
-
count
&&
i
<
count
,
"Index of a list item is out of scope!"
);
CheckNTErrors
(
count
>
0
,
"Cannt index the item in an empty list!"
);
if
(
i
==
-
1
)
return
items
[
count
-
1
];
if
(
i
<
0
)
return
items
[
count
+
i
];
else
return
items
[
i
];
}
...
...
@@ -226,8 +229,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item)
template
<
typename
T
>
inline
void
TensorListBase
<
T
>::
SetItem
(
int
i
,
T
&&
item
)
{
if
(
i
>=
0
&&
i
<
count
)
items
[
i
]
=
std
::
move
(
item
)
;
if
(
i
>=
0
&&
i
<
count
)
items
[
i
]
=
item
;
}
/*
...
...
@@ -246,11 +249,31 @@ inline int TensorListBase<T>::FindFirst(const T& item)
return
-
1
;
}
template
<>
inline
int
TensorListBase
<
Example
>::
FindFirst
(
const
Example
&
item
)
{
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
if
(
item
.
id
==
items
[
i
].
id
)
return
i
;
}
return
-
1
;
}
template
<>
inline
int
TensorListBase
<
Result
>::
FindFirst
(
const
Result
&
item
)
{
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
if
(
item
.
id
==
items
[
i
].
id
)
return
i
;
}
return
-
1
;
}
/* clear the data array */
template
<
typename
T
>
void
TensorListBase
<
T
>::
Clear
()
{
count
=
0
;
count
=
0
;
}
/*
...
...
@@ -295,6 +318,17 @@ void TensorListBase<T>::Remove(int i)
count
--
;
}
template
<
typename
T
>
void
TensorListBase
<
T
>::
Reserve
(
int
n
)
{
if
(
items
)
{
/* reserve failed */
return
;
}
items
=
new
T
[
n
];
}
/*
copy the list
>> myMem - memory pool used for allocating the data in the new list
...
...
@@ -349,6 +383,8 @@ template struct TensorListBase<long>;
template
struct
TensorListBase
<
float
>
;
template
struct
TensorListBase
<
short
>
;
template
struct
TensorListBase
<
XTensor
*>
;
template
struct
TensorListBase
<
Result
>
;
template
struct
TensorListBase
<
Example
>
;
template
struct
TensorListBase
<
void
*>
;
}
/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
source/tensor/XList.h
查看文件 @
bc49d32a
...
...
@@ -32,7 +32,7 @@
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
/* the TensorListBase class */
template
<
typename
T
>
struct
TensorListBase
{
...
...
@@ -66,68 +66,85 @@ public:
/* add an item into the list */
void
Add
(
T
&&
item
);
/* add an item into the list
*/
void
Add
(
const
T
&
item
);
/* return number of elements
*/
size_t
Size
(
);
/* add a number of items into the list */
void
Add
(
T
*
inputItems
,
int
inputItemCount
);
/* add an item into the list */
void
Add
(
const
T
&
item
);
/* add a number of items into the list */
void
Add
(
const
T
*
inputItems
,
int
inputItemCount
);
/* append a list to the current list */
/* append a list to the current list */
void
AddList
(
TensorListBase
*
l
);
/* insert an item to the given position of the list */
/* insert an item to the given position of the list */
void
Insert
(
int
pos
,
const
T
&
item
);
/* insert an item to the given position of the list */
void
Insert
(
int
pos
,
T
&&
item
);
/* insert an item to the given position of the list */
void
Insert
(
int
pos
,
T
&&
item
);
/* get the item at position i */
/* get the item at position i */
T
&
GetItem
(
int
i
)
const
;
/* set the item at position i */
/* set the item at position i */
void
SetItem
(
int
i
,
const
T
&
item
);
/* set the item at position i */
void
SetItem
(
int
i
,
T
&&
item
);
/* set the item at position i */
void
SetItem
(
int
i
,
T
&&
item
);
/* find the position of the first matched item */
/* find the position of the first matched item */
int
FindFirst
(
const
T
&
item
);
/* clear the data array */
/* clear the data array */
void
Clear
();
/* sort the list */
/* sort the list */
void
Sort
(
int
itemSize
);
/* reverse the list */
/* reverse the list */
void
Reverse
();
/* remove the item at position i */
/* remove the item at position i */
void
Remove
(
int
i
);
/* copy the list */
/* reserve space for data entry */
void
Reserve
(
int
n
);
/* copy the list */
TensorListBase
*
Copy
(
XMem
*
myMem
);
/* shuffle the list */
/* shuffle the list */
void
Shuffle
(
int
nround
=
10
,
int
beg
=
-
1
,
int
len
=
0
);
/* short */
T
&
operator
[]
(
int
i
)
{
return
GetItem
(
i
);
};
T
&
operator
[]
(
int
i
)
{
return
GetItem
(
i
);
};
T
&
Get
(
int
i
)
{
return
GetItem
(
i
);
};
void
Set
(
int
i
,
T
item
)
{
SetItem
(
i
,
item
);
};
void
Set
(
int
i
,
T
item
)
{
SetItem
(
i
,
item
);
};
};
struct
XTensor
;
typedef
TensorListBase
<
void
*>
XList
;
typedef
TensorListBase
<
int
>
IntList
;
typedef
TensorListBase
<
char
>
CharList
;
typedef
TensorListBase
<
char
*>
StrList
;
typedef
TensorListBase
<
long
>
LongList
;
typedef
TensorListBase
<
float
>
FloatList
;
typedef
TensorListBase
<
short
>
ShortList
;
typedef
TensorListBase
<
void
*>
XList
;
struct
Example
{
int
id
;
IntList
data
;
};
struct
Result
{
int
id
;
IntList
data
;
};
typedef
TensorListBase
<
Result
>
ResultList
;
typedef
TensorListBase
<
Example
>
ExampleList
;
typedef
TensorListBase
<
XTensor
*>
TensorList
;
}
/* end of the nts (NiuTrans.Tensor) namespace */
...
...
source/tensor/XName.h
查看文件 @
bc49d32a
...
...
@@ -51,7 +51,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MASK MATH_DIVDIM + 1
#define MATH_MATRIXMUL MATH_MASK + 1
#define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_MAX MATH_MATRIXMULBATCHED + 1
#define MATH_MIN MATH_MAX + 1
#define MATH_MULTIPLY MATH_MIN + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
...
...
source/tensor/XTensor.cpp
查看文件 @
bc49d32a
...
...
@@ -280,7 +280,7 @@ void XTensor::Init()
isTmp
=
false
;
isGrad
=
false
;
isVar
=
false
;
enableGrad
=
fals
e
;
enableGrad
=
tru
e
;
visitMark
=
0
;
grad
=
NULL
;
}
...
...
@@ -397,7 +397,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
CheckNTErrors
(
outgo
.
tailNum
==
0
,
"The node has outgoing edge to other nodes!"
);
/* create tensor links for the new tensor */
XLink
::
Replace
(
&
tensor
,
this
);
XLink
::
Copy
(
&
tensor
,
this
);
}
return
*
this
;
...
...
@@ -445,7 +445,7 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
This is VERY tricky and there might be better solutions :) */
*
tensor
.
dataP
=
NULL
;
XLink
::
Replace
(
&
tensor
,
this
);
XLink
::
Copy
(
&
tensor
,
this
);
return
*
this
;
}
...
...
@@ -720,7 +720,7 @@ int XTensor::GetSize() const
}
/* get the size of the memory space used */
int
XTensor
::
GetDataSizeInChar
()
int
XTensor
::
GetDataSizeInChar
()
const
{
if
(
isSparse
){
int
num
=
int
(
unitNum
*
denseRatio
+
1
);
...
...
@@ -738,7 +738,7 @@ get unit size in terms of "dataType"
>> myDataType - type of unit
<< return - unit size
*/
int
XTensor
::
GetUnitSize
(
TENSOR_DATA_TYPE
myDataType
)
int
XTensor
::
GetUnitSize
(
TENSOR_DATA_TYPE
myDataType
)
const
{
if
(
myDataType
==
X_INT
)
return
sizeof
(
int
);
...
...
@@ -758,7 +758,7 @@ get offset (2D)
>> row - index of demension 0
>> col - index of demension 1
*/
MTYPE
XTensor
::
GetOffset2D
(
int
row
,
int
col
)
MTYPE
XTensor
::
GetOffset2D
(
int
row
,
int
col
)
const
{
CheckNTErrors
(
order
==
2
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
row
>=
0
&&
row
<
dimSize
[
0
],
"dimension 0 is out of range!"
);
...
...
@@ -773,7 +773,7 @@ get offset (3D)
>> d1 - index of demension 1
>> d2 - index of demension 2
*/
MTYPE
XTensor
::
GetOffset3D
(
int
d0
,
int
d1
,
int
d2
)
MTYPE
XTensor
::
GetOffset3D
(
int
d0
,
int
d1
,
int
d2
)
const
{
CheckNTErrors
(
order
==
3
,
"Cannot get a 3d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
d0
>=
0
&&
d0
<
dimSize
[
0
],
"dimension 0 is out of range!"
);
...
...
@@ -856,6 +856,16 @@ void XTensor::Rand(int rNum, int cNum)
_SetDataRand
(
this
,
rNum
,
cNum
);
}
/* generate data items with a range by start, end and the step
>> start - the begin of the array
>> end - the end of the array (not included self)
>> step - the step of two items
*/
void
XTensor
::
Range
(
DTYPE
lower
,
DTYPE
upper
,
DTYPE
step
)
{
_SetDataRange
(
this
,
lower
,
upper
,
step
);
}
/*
set the tensor items by a uniform distribution in range [lower, upper]
>> lower - lower value of the range
...
...
@@ -929,9 +939,11 @@ set the tensor items by a normal distribution
void
XTensor
::
SetDataRandn
(
DTYPE
mean
,
DTYPE
standardDeviation
)
{
// TODO: cuda code!!!!!!!
if
(
data
==
NULL
)
return
;
// srand((unsigned)time(0));
void
*
d
=
NULL
;
if
(
dataType
==
X_FLOAT
)
{
d
=
new
float
[
unitNum
];
...
...
@@ -986,7 +998,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
>> num - number of data items
>> beg - where we start this in the data array of the tensor
*/
bool
XTensor
::
CheckData
(
const
void
*
d
,
int
num
,
int
beg
)
bool
XTensor
::
CheckData
(
const
void
*
d
,
int
num
,
int
beg
)
const
{
if
(
data
==
NULL
||
d
==
NULL
)
return
false
;
...
...
@@ -1030,7 +1042,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
}
/* check whether the data array is the same as the "answer" */
bool
XTensor
::
CheckData
(
const
void
*
d
,
int
num
,
float
tolerance
,
int
beg
)
bool
XTensor
::
CheckData
(
const
void
*
d
,
int
num
,
float
tolerance
,
int
beg
)
const
{
if
(
data
==
NULL
||
d
==
NULL
)
return
false
;
...
...
@@ -1106,7 +1118,7 @@ get the value of a cell with the index
>> size - size of the index
<< return - cell value
*/
DTYPE
XTensor
::
Get
(
int
index
[],
int
size
)
DTYPE
XTensor
::
Get
(
int
index
[],
int
size
)
const
{
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in the default type."
);
...
...
@@ -1118,7 +1130,7 @@ get the value of a cell with its offset
>> offset - offset in the array
<< return - cell value
*/
DTYPE
XTensor
::
Get
(
int
offset
)
DTYPE
XTensor
::
Get
(
int
offset
)
const
{
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in the default type."
);
CheckNTErrors
(
offset
>=
0
&&
offset
<
unitNum
,
"Invalid index!"
);
...
...
@@ -1170,7 +1182,7 @@ get the value of a cell in a 1d tensor in default type
>> i - idex
<< return - value of cell(i) in float
*/
DTYPE
XTensor
::
Get1D
(
int
i
)
DTYPE
XTensor
::
Get1D
(
int
i
)
const
{
CheckNTErrors
((
order
==
1
),
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
((
i
>=
0
&&
i
<
dimSize
[
0
]),
"dimension 0 is out of range!"
);
...
...
@@ -1207,7 +1219,7 @@ get the value of a cell in a 3d tensor
>> d1 - index of dimension 1
>> d2 - index of dimension 2
*/
DTYPE
XTensor
::
Get3D
(
int
d0
,
int
d1
,
int
d2
)
DTYPE
XTensor
::
Get3D
(
int
d0
,
int
d1
,
int
d2
)
const
{
CheckNTErrors
((
order
==
3
),
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
((
d0
>=
0
&&
d0
<
dimSize
[
0
]),
"dimension 0 is out of range!"
);
...
...
@@ -1225,7 +1237,7 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2)
get the int value of a cell by its offset
>> offset - offset of the item
*/
int
XTensor
::
GetInt
(
int
offset
)
int
XTensor
::
GetInt
(
int
offset
)
const
{
CheckNTErrors
(
dataType
==
X_INT
,
"The tensor is not in the integer type."
);
CheckNTErrors
(
offset
>=
0
&&
offset
<
unitNum
,
"Invalid index!"
);
...
...
@@ -1242,7 +1254,7 @@ get the value of a cell in a 1d tensor in int type
>> i - index
<< return - value of cell(i) in int
*/
int
XTensor
::
Get1DInt
(
int
i
)
int
XTensor
::
Get1DInt
(
int
i
)
const
{
CheckNTErrors
(
order
==
1
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
i
>=
0
&&
i
<
dimSize
[
0
],
"dimension 0 is out of range!"
);
...
...
@@ -1260,7 +1272,7 @@ get the value of a cell in a 2d tensor in int type
>> mi - column index
<< return - value of cell(ni, mi) in int
*/
int
XTensor
::
Get2DInt
(
int
ni
,
int
mi
)
int
XTensor
::
Get2DInt
(
int
ni
,
int
mi
)
const
{
CheckNTErrors
(
order
==
2
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
ni
>=
0
&&
ni
<
dimSize
[
0
],
"dimension 0 is out of range!"
);
...
...
@@ -1280,7 +1292,7 @@ get the value of a cell in a 3d tensor in int type
>> d2 - index of dimension 2
<< return - value of cell(d0, d1, d2) in int
*/
int
XTensor
::
Get3DInt
(
int
d0
,
int
d1
,
int
d2
)
int
XTensor
::
Get3DInt
(
int
d0
,
int
d1
,
int
d2
)
const
{
CheckNTErrors
(
order
==
3
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
d0
>=
0
&&
d0
<
dimSize
[
0
],
"dimension 0 is out of range!"
);
...
...
@@ -1299,7 +1311,7 @@ get the value of a cell in the sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor
<< return - value of the tuple
*/
DTYPE
XTensor
::
GetInSparse
(
int
i
)
DTYPE
XTensor
::
GetInSparse
(
int
i
)
const
{
CheckNTErrors
(
i
>=
0
&&
i
<
unitNum
,
"Index is out of range!"
);
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in default type."
);
...
...
@@ -1315,7 +1327,7 @@ get the key value of a tuple in a sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor
<< return - key of the tuple
*/
int
XTensor
::
GetKeyInSparse
(
int
i
)
int
XTensor
::
GetKeyInSparse
(
int
i
)
const
{
CheckNTErrors
(
i
>=
0
&&
i
<
unitNum
,
"Index is out of range!"
);
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in default type."
);
...
...
@@ -1528,7 +1540,7 @@ increase the value of a cell in a 2d tensor
}
/* get the number of non-zero elements (in a sparse tensor) */
int
XTensor
::
GetNonzeroSize
()
int
XTensor
::
GetNonzeroSize
()
const
{
if
(
!
isSparse
){
XPRINT
(
1
,
stderr
,
"WARNING! Counting non-zero elements in a dense tensor might be slow!
\n
"
);
...
...
@@ -1893,148 +1905,6 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
void
*
RecursionData
(
XTensor
*
s
,
int
dim
,
int
*
index
,
void
*
d
,
FILE
*
file
)
{
if
(
dim
==
s
->
order
-
2
)
{
/* print index */
printf
(
"Index: "
);
for
(
int
i
=
0
;
i
<
s
->
order
-
2
;
i
++
)
printf
(
"[%d]"
,
index
[
i
]);
int
dimSize1
=
s
->
dimSize
[
dim
];
int
dimSize2
=
s
->
dimSize
[
dim
+
1
];
printf
(
" %d * %d
\n
"
,
dimSize1
,
dimSize2
);
/* print 2D data */
if
(
s
->
dataType
==
X_FLOAT
)
{
float
*
data
=
(
float
*
)
d
;
for
(
int
i
=
0
;
i
<
dimSize1
;
i
++
)
{
printf
(
"
\t
"
);
for
(
int
j
=
0
;
j
<
dimSize2
;
j
++
)
fprintf
(
file
,
"%e "
,
*
data
++
);
fprintf
(
file
,
"
\n
"
);
}
d
=
(
float
*
)
d
+
dimSize1
*
dimSize2
;
}
else
if
(
s
->
dataType
==
X_INT
)
{
int
*
data
=
(
int
*
)
d
;
for
(
int
i
=
0
;
i
<
dimSize1
;
i
++
)
{
printf
(
"
\t
"
);
for
(
int
j
=
0
;
j
<
dimSize2
;
j
++
)
fprintf
(
file
,
"%d "
,
*
data
++
);
fprintf
(
file
,
"
\n
"
);
}
d
=
(
int
*
)
d
+
dimSize1
*
dimSize2
;
}
else
ShowNTErrors
(
"TODO!"
);
return
d
;
}
/* recursion for deeper dimsion */
int
levelSize
=
s
->
dimSize
[
dim
];
for
(
int
k
=
0
;
k
<
levelSize
;
k
++
)
{
index
[
dim
]
=
k
;
d
=
RecursionData
(
s
,
dim
+
1
,
index
,
d
,
file
);
}
return
d
;
}
/*
dump data to a file
>> file - where to domp the data
>> label - label of the tensor
>> n - number of items to dump
>> beg - the first item id
>> verbose - verbose level
*/
void
XTensor
::
DumpFormat
(
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
{
if
(
verbose
>
verboseLevel
)
return
;
void
*
d
=
data
;
bool
isNewData
=
false
;
#ifdef USE_CUDA
if
(
devID
>=
0
)
{
CudaGPUToCPUFlush
(
this
);
d
=
dataHost
;
isNewData
=
true
;
}
#endif
if
(
d
==
NULL
)
{
if
(
isSparse
)
{
int
num
=
0
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
num
*=
dimSizeRDI
[
i
];
num
=
int
(
num
*
denseRatio
+
1
);
int
tupleSize
=
sizeof
(
int
)
+
sizeof
(
DTYPE
);
int
size
=
sizeof
(
int
)
+
tupleSize
*
(
num
);
d
=
new
char
[
size
];
memset
(
d
,
0
,
size
);
}
else
{
d
=
new
char
[
unitNum
*
unitSize
];
memset
(
d
,
0
,
unitNum
*
unitSize
);
}
isNewData
=
true
;
}
if
(
label
!=
NULL
)
fprintf
(
file
,
"%s "
,
label
);
if
(
isInit
){
fprintf
(
file
,
"id=%d "
,
id
);
fprintf
(
file
,
"order=%d dimsize="
,
order
);
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
fprintf
(
file
,
"%d"
,
dimSize
[
i
]);
if
(
i
<
order
-
1
)
fprintf
(
file
,
","
);
}
}
else
{
fprintf
(
file
,
"order=-1 dimsize=-1"
);
}
fprintf
(
file
,
" dtype=%s dense=%f
\n
"
,
GetDataTypeName
(
dataType
),
denseRatio
);
if
(
!
isInit
){
fprintf
(
file
,
"NULL"
);
}
if
(
order
==
1
)
{
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
{
if
(
dataType
==
X_FLOAT
)
fprintf
(
file
,
"%e "
,
((
float
*
)
d
)[
i
]);
else
if
(
dataType
==
X_INT
)
fprintf
(
file
,
"%d "
,
((
int
*
)
d
)[
i
]);
else
ShowNTErrors
(
"TODO!"
);
}
printf
(
"
\n
"
);
}
/* print multi-dimensional tensor */
else
{
int
*
index
=
new
int
[
order
];
RecursionData
(
this
,
0
,
index
,
d
,
file
);
delete
[]
index
;
}
fprintf
(
file
,
"
\n
"
);
if
(
isNewData
)
{
delete
[](
char
*
)
d
;
#ifdef USE_CUDA
if
(
devID
>=
0
)
dataHost
=
NULL
;
#endif
}
}
/*
dump data to a file
>> tensor - the tensor for dumping
...
...
@@ -2052,6 +1922,26 @@ void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, cons
}
/*
dump data to a binary file
>> file - where to dump the data
*/
void
XTensor
::
BinaryDump
(
FILE
*
file
)
{
XTensor
tmp
;
InitTensorOnCPU
(
&
tmp
,
this
);
_CopyValues
(
this
,
&
tmp
);
switch
(
dataType
)
{
case
X_INT
:
{
fwrite
(
tmp
.
data
,
sizeof
(
int
),
unitNum
,
file
);
}
default
:
{
fwrite
(
tmp
.
data
,
sizeof
(
float
),
unitNum
,
file
);
}
}
}
/*
read data from a file
>> file - where to load the data
>> label - label of the tensor
...
...
@@ -2163,6 +2053,30 @@ void XTensor::Read(FILE * file, const char * label)
delete
[](
char
*
)
dataBuf
;
}
/*
read data from a binary file
>>> file - the file stream pointer
>>> offset - the distance from the start to this tensor
*/
void
XTensor
::
BinaryRead
(
FILE
*
file
,
size_t
offset
)
{
fseek
(
file
,
offset
,
0
);
switch
(
dataType
)
{
case
X_INT
:
{
int
*
d
=
new
int
[
unitNum
];
fread
(
d
,
sizeof
(
int
),
unitNum
,
file
);
SetData
(
d
,
unitNum
);
delete
[]
d
;
}
default
:
{
float
*
d
=
new
float
[
unitNum
];
fread
(
d
,
sizeof
(
float
),
unitNum
,
file
);
SetData
(
d
,
unitNum
);
delete
[]
d
;
}
}
}
/*
flush the data to the target device
>> targetMem - memory pool on the target device
...
...
@@ -2327,7 +2241,7 @@ initialize a dense tensor V2
void
InitTensorV2
(
XTensor
*
tensor
,
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
int
myDevID
,
const
bool
isEnableGrad
)
{
if
(
tensor
->
mem
==
NULL
)
{
XMem
*
myMem
=
GMems
.
GetMem
(
myDevID
);
...
...
@@ -2359,6 +2273,7 @@ void InitTensorV2(XTensor * tensor,
if
(
allocated
)
XTensor
::
AllocateData
(
tensor
);
}
tensor
->
enableGrad
=
isEnableGrad
;
}
/*
...
...
@@ -2392,12 +2307,12 @@ initialize a dense tensor V2
*/
void
InitTensor1DV2
(
XTensor
*
tensor
,
const
int
num
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
1
];
dims
[
0
]
=
num
;
InitTensorV2
(
tensor
,
1
,
dims
,
myDataType
,
myDevID
);
InitTensorV2
(
tensor
,
1
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2434,13 +2349,13 @@ initialize a dense matrix V2
*/
void
InitTensor2DV2
(
XTensor
*
tensor
,
const
int
rowNum
,
const
int
colNum
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
2
];
dims
[
0
]
=
rowNum
;
dims
[
1
]
=
colNum
;
InitTensorV2
(
tensor
,
2
,
dims
,
myDataType
,
myDevID
);
InitTensorV2
(
tensor
,
2
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2480,14 +2395,14 @@ initialize a dense 3d tensor V2
*/
void
InitTensor3DV2
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
3
];
dims
[
0
]
=
d0
;
dims
[
1
]
=
d1
;
dims
[
2
]
=
d2
;
InitTensorV2
(
tensor
,
3
,
dims
,
myDataType
,
myDevID
);
InitTensorV2
(
tensor
,
3
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2530,7 +2445,7 @@ initialize a dense 4d tensor V2
*/
void
InitTensor4DV2
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
4
];
dims
[
0
]
=
d0
;
...
...
@@ -2538,7 +2453,7 @@ void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2,
dims
[
2
]
=
d2
;
dims
[
3
]
=
d3
;
InitTensorV2
(
tensor
,
4
,
dims
,
myDataType
,
myDevID
);
InitTensorV2
(
tensor
,
4
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2584,7 +2499,7 @@ initialize a dense 5d tensor V2
*/
void
InitTensor5DV2
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
5
];
dims
[
0
]
=
d0
;
...
...
@@ -2593,7 +2508,7 @@ void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2,
dims
[
3
]
=
d3
;
dims
[
4
]
=
d4
;
InitTensorV2
(
tensor
,
5
,
dims
,
myDataType
,
myDevID
);
InitTensorV2
(
tensor
,
5
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2685,10 +2600,12 @@ generate a dense XTensor V2
*/
XTensor
*
NewTensorV2
(
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
int
myDevID
,
const
bool
isEnableGrad
)
{
XMem
*
myMem
=
GMems
.
GetMem
(
myDevID
);
return
new
XTensor
(
myOrder
,
myDimSize
,
myDataType
,
1.0
F
,
myDevID
,
myMem
);
XTensor
*
tensor
=
new
XTensor
(
myOrder
,
myDimSize
,
myDataType
,
1.0
F
,
myDevID
,
myMem
);
tensor
->
enableGrad
=
isEnableGrad
;
return
tensor
;
}
/*
...
...
@@ -2714,6 +2631,9 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
XTensor
*
tensor
=
NewTensor
(
myOrder
,
dims
,
myDataType
,
myDenseRatio
,
devID
,
myMem
);
if
(
tensor
->
unitNum
*
tensor
->
unitSize
==
176657664
)
{
tensor
->
Dump
(
stderr
,
""
,
200
);
}
if
(
myMem
!=
NULL
)
tensor
->
data
=
myMem
->
AllocBuf
(
myMem
->
devID
,
tensor
->
unitNum
*
tensor
->
unitSize
);
else
...
...
@@ -2732,14 +2652,14 @@ generate a dense XTensor which allocates data on the buffer V2
*/
XTensor
*
NewTensorBufV2
(
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
devID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
devID
,
const
bool
isEnableGrad
)
{
int
dims
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dims
,
myDimSize
,
sizeof
(
int
)
*
myOrder
);
dims
[
0
]
=
-
abs
(
dims
[
0
]);
XTensor
*
tensor
=
NewTensorV2
(
myOrder
,
dims
,
myDataType
,
devID
);
XTensor
*
tensor
=
NewTensorV2
(
myOrder
,
dims
,
myDataType
,
devID
,
isEnableGrad
);
if
(
tensor
->
unitNum
*
tensor
->
unitSize
==
176657664
)
{
tensor
->
Dump
(
stderr
,
""
,
200
);
...
...
@@ -2771,10 +2691,10 @@ generate a XTensor which allocates data on the buffer V2
>> reference - reference tensor
>> devID - device id
*/
XTensor
*
NewTensorBufV2
(
const
XTensor
*
reference
,
int
devID
)
XTensor
*
NewTensorBufV2
(
const
XTensor
*
reference
,
int
devID
,
const
bool
isEnableGrad
)
{
return
NewTensorBufV2
(
reference
->
order
,
reference
->
dimSize
,
reference
->
dataType
,
devID
);
reference
->
dataType
,
devID
,
isEnableGrad
);
}
/*
...
...
@@ -2806,12 +2726,12 @@ generate a dense vector V2
*/
XTensor
*
NewTensor1DV2
(
const
int
num
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
1
];
dims
[
0
]
=
num
;
return
NewTensorV2
(
1
,
dims
,
myDataType
,
myDevID
);
return
NewTensorV2
(
1
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2846,13 +2766,13 @@ generate a dense matrix V2
*/
XTensor
*
NewTensor2DV2
(
const
int
rowNum
,
const
int
colNum
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
2
];
dims
[
0
]
=
rowNum
;
dims
[
1
]
=
colNum
;
return
NewTensorV2
(
2
,
dims
,
myDataType
,
myDevID
);
return
NewTensorV2
(
2
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2890,14 +2810,14 @@ generate a dense 3d tensor V2
*/
XTensor
*
NewTensor3DV2
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
3
];
dims
[
0
]
=
d0
;
dims
[
1
]
=
d1
;
dims
[
2
]
=
d2
;
return
NewTensorV2
(
3
,
dims
,
myDataType
,
myDevID
);
return
NewTensorV2
(
3
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2938,7 +2858,7 @@ generate a dense 4d tensor V2
*/
XTensor
*
NewTensor4DV2
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
4
];
dims
[
0
]
=
d0
;
...
...
@@ -2946,7 +2866,7 @@ XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
dims
[
2
]
=
d2
;
dims
[
3
]
=
d3
;
return
NewTensorV2
(
4
,
dims
,
myDataType
,
myDevID
);
return
NewTensorV2
(
4
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
/*
...
...
@@ -2990,7 +2910,7 @@ generate a dense 5d tensor V2
*/
XTensor
*
NewTensor5DV2
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
)
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
dims
[
5
];
dims
[
0
]
=
d0
;
...
...
@@ -2999,7 +2919,17 @@ XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3,
dims
[
3
]
=
d3
;
dims
[
4
]
=
d4
;
return
NewTensorV2
(
5
,
dims
,
myDataType
,
myDevID
);
return
NewTensorV2
(
5
,
dims
,
myDataType
,
myDevID
,
isEnableGrad
);
}
XTensor
*
NewTensorRange
(
int
lower
,
int
upper
,
int
step
,
const
TENSOR_DATA_TYPE
myDataType
,
const
int
myDevID
,
const
bool
isEnableGrad
)
{
int
size
=
abs
(
upper
-
lower
);
int
unitNum
=
ceil
(
1.0
*
size
/
abs
(
step
));
XTensor
*
tensor
=
NewTensor1DV2
(
unitNum
,
myDataType
,
myDevID
,
isEnableGrad
);
tensor
->
Range
(
lower
,
upper
,
step
);
return
tensor
;
}
/*
...
...
source/tensor/XTensor.h
查看文件 @
bc49d32a
...
...
@@ -290,16 +290,16 @@ public:
int
GetSize
()
const
;
/* get size of the memory used */
int
GetDataSizeInChar
();
int
GetDataSizeInChar
()
const
;
/* get unit size in terms of "dataType" */
int
GetUnitSize
(
TENSOR_DATA_TYPE
myDataType
);
int
GetUnitSize
(
TENSOR_DATA_TYPE
myDataType
)
const
;
/* get offset (2D) */
MTYPE
GetOffset2D
(
int
row
,
int
col
);
MTYPE
GetOffset2D
(
int
row
,
int
col
)
const
;
/* get offset (3D) */
MTYPE
GetOffset3D
(
int
d0
,
int
d1
,
int
d2
);
MTYPE
GetOffset3D
(
int
d0
,
int
d1
,
int
d2
)
const
;
/* a tensor with all entries of 0 */
void
SetZeroAll
(
XStream
*
stream
=
NULL
);
...
...
@@ -310,6 +310,9 @@ public:
/* generate data items with a uniform distribution in [0, 1] */
void
Rand
(
int
rNum
,
int
cNum
);
/* generate data items with a range by start, end and the step */
void
Range
(
DTYPE
lower
,
DTYPE
upper
,
DTYPE
step
);
/* set tensor items by a uniform distribution */
void
SetDataRand
(
DTYPE
lower
=
0
.
0
F
,
DTYPE
upper
=
1
.
0
F
);
...
...
@@ -323,10 +326,10 @@ public:
void
SetDataBatchedWithValues
(
MTYPE
*
offsets
,
void
*
values
,
int
num
);
/* check whether the data array is the same as the answer */
bool
CheckData
(
const
void
*
answer
,
int
num
,
int
beg
=
0
);
bool
CheckData
(
const
void
*
answer
,
int
num
,
int
beg
=
0
)
const
;
/* check whether the data array is the same as the answer */
bool
CheckData
(
const
void
*
answer
,
int
num
,
float
tolerance
,
int
beg
=
0
);
bool
CheckData
(
const
void
*
answer
,
int
num
,
float
tolerance
,
int
beg
=
0
)
const
;
/* set the pointer to "data" */
void
SetDataPointer
();
...
...
@@ -335,40 +338,40 @@ public:
void
SetAscendingOrder
(
int
dim
);
/* get the value of a cell with the index */
DTYPE
Get
(
int
index
[],
int
size
=
-
1
);
DTYPE
Get
(
int
index
[],
int
size
=
-
1
)
const
;
/* get the value of a cell with the offset */
DTYPE
Get
(
int
offset
);
DTYPE
Get
(
int
offset
)
const
;
/* get the pointer to a cell */
void
*
GetCell
(
int
index
[],
int
size
=
-
1
)
const
;
/* get the default type value of a cell in a 1d tensor */
DTYPE
Get1D
(
int
i
);
DTYPE
Get1D
(
int
i
)
const
;
/* get the default type value of a cell in a 2d tensor */
DTYPE
Get2D
(
int
ni
,
int
mi
)
const
;
/* get the default type value of a cell in a 3d tensor */
DTYPE
Get3D
(
int
d0
,
int
d1
,
int
d2
);
DTYPE
Get3D
(
int
d0
,
int
d1
,
int
d2
)
const
;
/* get the int value of a cell by its offset */
int
GetInt
(
int
offset
);
int
GetInt
(
int
offset
)
const
;
/* get the int value of a cell in a 1d tensor */
int
Get1DInt
(
int
i
);
int
Get1DInt
(
int
i
)
const
;
/* get the int value of a cell in a 2d tensor */
int
Get2DInt
(
int
ni
,
int
mi
);
int
Get2DInt
(
int
ni
,
int
mi
)
const
;
/* get the int value of a cell in a 3d tensor */
int
Get3DInt
(
int
d0
,
int
d1
,
int
d2
);
int
Get3DInt
(
int
d0
,
int
d1
,
int
d2
)
const
;
/* get the value of a cell in a sparse tensor */
DTYPE
GetInSparse
(
int
i
);
DTYPE
GetInSparse
(
int
i
)
const
;
/* get the key value of a tuple in a sparse tensor */
int
GetKeyInSparse
(
int
i
);
int
GetKeyInSparse
(
int
i
)
const
;
/* set the value of a cell */
bool
Set
(
DTYPE
value
,
int
index
[],
int
size
=
-
1
);
...
...
@@ -404,7 +407,7 @@ public:
bool
Add2D
(
DTYPE
value
,
int
ni
,
int
mi
);
/* get the number of non-zero elements (in a sparse tensor) */
int
GetNonzeroSize
();
int
GetNonzeroSize
()
const
;
/* set the tensor as "temporary" */
void
SetTMPFlag
(
bool
myIsTmp
=
true
);
...
...
@@ -428,17 +431,20 @@ public:
/* dump data to a file */
void
Dump
(
FILE
*
file
,
const
char
*
label
=
NULL
,
const
int
n
=
-
1
,
const
int
beg
=
0
,
const
int
verbose
=
0
);
/* dump data to a file */
void
DumpFormat
(
FILE
*
file
,
const
char
*
label
=
NULL
,
const
int
n
=
-
1
,
const
int
beg
=
0
,
const
int
verbose
=
0
);
/* dump data to a file */
static
void
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
=
NULL
,
const
int
n
=
-
1
,
const
int
beg
=
0
,
const
int
verbose
=
0
);
/* dump data to a binary file */
void
BinaryDump
(
FILE
*
file
);
/* read data from a file */
void
Read
(
FILE
*
file
,
const
char
*
label
=
NULL
);
/* read data from a binary file */
void
BinaryRead
(
FILE
*
file
,
size_t
offset
);
/* flush the data to the target device */
void
FlushToMem
(
XMem
*
targetMem
);
...
...
@@ -469,7 +475,7 @@ void InitTensor(XTensor * tensor,
/* initialize a dense XTensor V2 */
void
InitTensorV2
(
XTensor
*
tensor
,
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* initialize a dense vector */
void
InitTensor1D
(
XTensor
*
tensor
,
const
int
num
,
...
...
@@ -477,7 +483,7 @@ void InitTensor1D(XTensor * tensor, const int num,
/* initialize a dense vector V2 */
void
InitTensor1DV2
(
XTensor
*
tensor
,
const
int
num
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* initialize a dense matrix */
void
InitTensor2D
(
XTensor
*
tensor
,
const
int
rowNum
,
const
int
colNum
,
...
...
@@ -485,7 +491,7 @@ void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
/* initialize a dense matrix V2 */
void
InitTensor2DV2
(
XTensor
*
tensor
,
const
int
rowNum
,
const
int
colNum
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* initialize a dense 3d tensor */
void
InitTensor3D
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
...
...
@@ -493,7 +499,7 @@ void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
/* initialize a dense 3d tensor V2 */
void
InitTensor3DV2
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* initialize a dense 4d tensor */
void
InitTensor4D
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
...
...
@@ -501,7 +507,7 @@ void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, co
/* initialize a dense 4d tensor V2 */
void
InitTensor4DV2
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* initialize a dense 5d tensor */
void
InitTensor5D
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
...
...
@@ -509,7 +515,7 @@ void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, co
/* initialize a dense 5d tensor V2 */
void
InitTensor5DV2
(
XTensor
*
tensor
,
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* initialize a tensor with a reference tensor */
void
InitTensor
(
XTensor
*
tensor
,
const
XTensor
*
reference
);
...
...
@@ -529,7 +535,7 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_
/* generate a dense XTensor V2 */
XTensor
*
NewTensorV2
(
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a XTensor which allocates data on the buffer */
XTensor
*
NewTensorBuf
(
const
int
myOrder
,
const
int
*
myDimSize
,
...
...
@@ -538,20 +544,20 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
/* generate a dense XTensor which allocates data on the buffer V2 */
XTensor
*
NewTensorBufV2
(
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a XTensor which allocates data on the buffer */
XTensor
*
NewTensorBuf
(
const
XTensor
*
reference
,
int
devID
,
XMem
*
myMem
);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor
*
NewTensorBufV2
(
const
XTensor
*
reference
,
int
devID
);
XTensor
*
NewTensorBufV2
(
const
XTensor
*
reference
,
int
devID
,
const
bool
isEnableGrad
=
true
);
/* generate a dense vector */
XTensor
*
NewTensor1D
(
const
int
num
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* generate a dense vector V2 */
XTensor
*
NewTensor1DV2
(
const
int
num
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
XTensor
*
NewTensor1DV2
(
const
int
num
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a dense matrix */
XTensor
*
NewTensor2D
(
const
int
rowNum
,
const
int
colNum
,
...
...
@@ -561,7 +567,7 @@ XTensor * NewTensor2D(const int rowNum, const int colNum,
/* generate a dense matrix V2 */
XTensor
*
NewTensor2DV2
(
const
int
rowNum
,
const
int
colNum
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a dense 3d tensor */
XTensor
*
NewTensor3D
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
...
...
@@ -571,7 +577,7 @@ XTensor * NewTensor3D(const int d0, const int d1, const int d2,
/* generate a dense 3d tensor V2 */
XTensor
*
NewTensor3DV2
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a dense 4d tensor */
XTensor
*
NewTensor4D
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
...
...
@@ -581,7 +587,7 @@ XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
/* generate a dense 4d tensor V2 */
XTensor
*
NewTensor4DV2
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a dense 5d tensor */
XTensor
*
NewTensor5D
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
...
...
@@ -591,7 +597,10 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
/* generate a dense 5d tensor V2 */
XTensor
*
NewTensor5DV2
(
const
int
d0
,
const
int
d1
,
const
int
d2
,
const
int
d3
,
const
int
d4
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myDevID
=
-
1
);
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a dense vector by range */
XTensor
*
NewTensorRange
(
int
lower
,
int
upper
,
int
step
,
const
TENSOR_DATA_TYPE
myDataType
=
X_INT
,
const
int
myDevID
=
-
1
,
const
bool
isEnableGrad
=
true
);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor
*
NewTensor
(
const
XTensor
*
a
,
bool
isFilledData
=
true
);
...
...
source/tensor/core/arithmetic/Div.cpp
查看文件 @
bc49d32a
...
...
@@ -215,18 +215,22 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
_Div
(
&
a
,
&
b
,
&
c
,
alpha
,
leadingDim
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIV
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIV
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
}
}
else
if
(
n
>=
0
&&
n
<
a
.
order
){
/* call _DivDim function */
_DivDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
...
...
@@ -261,7 +265,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */
_Div
(
&
a
,
&
b
,
&
c
,
0
,
leadingDim
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIV
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
...
...
@@ -272,7 +276,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */
_DivDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
source/tensor/core/arithmetic/DivDim.cpp
查看文件 @
bc49d32a
...
...
@@ -164,10 +164,12 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
_DivDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
return
c
;
}
...
...
@@ -193,7 +195,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
/* call _Div function */
_DivDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
if
(
c
.
enableGrad
==
true
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
source/tensor/core/arithmetic/Mask.cpp
查看文件 @
bc49d32a
...
...
@@ -155,8 +155,10 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
_Mask
(
&
a
,
&
mask
,
&
c
,
alpha
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
mask
,
&
c
,
MATH_MASK
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
mask
,
&
c
,
MATH_MASK
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
return
c
;
}
...
...
@@ -176,7 +178,7 @@ void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
/* call _Mask function */
_Mask
(
&
a
,
&
mask
,
&
c
,
alpha
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
mask
,
&
c
,
MATH_MASK
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
...
...
source/tensor/core/arithmetic/MatrixMul.cpp
查看文件 @
bc49d32a
...
...
@@ -296,10 +296,12 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
_MatrixMul
(
&
a
,
transposedA
,
&
b
,
transposedB
,
&
c
,
alpha
,
0
,
parallelRunner
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMUL
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedA
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedB
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMUL
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedA
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedB
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -344,7 +346,7 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
/* call _MatrixMul function */
_MatrixMul
(
&
a
,
transposedA
,
&
b
,
transposedB
,
&
c
,
alpha
,
beta
,
parallelRunner
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMUL
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedA
);
...
...
@@ -393,10 +395,12 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
_MatrixMul
(
&
a
,
X_NOTRANS
,
&
b
,
X_NOTRANS
,
&
c
,
alpha
,
0
,
parallelRunner
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMUL
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMUL
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -440,7 +444,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */
_MatrixMul
(
&
a
,
X_NOTRANS
,
&
b
,
X_NOTRANS
,
&
c
,
alpha
,
0
,
parallelRunner
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMUL
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
...
...
source/tensor/core/arithmetic/MatrixMul2D.cpp
查看文件 @
bc49d32a
...
...
@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors
((
a
->
order
==
2
&&
b
->
order
==
2
&&
c
->
order
==
2
),
"Input tensors must have a order = 2!"
);
int
an
=
a
->
dimSize
[
0
],
am
=
a
->
dimSize
[
1
];
int
bn
=
b
->
dimSize
[
0
],
bm
=
b
->
dimSize
[
1
];
int
cn
=
c
->
dimSize
[
0
],
cm
=
c
->
dimSize
[
1
];
int
am2
=
transposedA
==
X_TRANS
?
an
:
am
;
int
an2
=
transposedA
==
X_TRANS
?
am
:
an
;
int
bm2
=
transposedB
==
X_TRANS
?
bn
:
bm
;
int
bn2
=
transposedB
==
X_TRANS
?
bm
:
bn
;
int
cm2
=
cm
;
int
cn2
=
cn
;
int
an
=
a
->
dimSize
[
0
],
am
=
a
->
dimSize
[
1
];
int
bn
=
b
->
dimSize
[
0
],
bm
=
b
->
dimSize
[
1
];
int
cn
=
c
->
dimSize
[
0
],
cm
=
c
->
dimSize
[
1
];
int
am2
=
transposedA
==
X_TRANS
?
an
:
am
;
int
an2
=
transposedA
==
X_TRANS
?
am
:
an
;
int
bm2
=
transposedB
==
X_TRANS
?
bn
:
bm
;
int
bn2
=
transposedB
==
X_TRANS
?
bm
:
bn
;
int
cm2
=
cm
;
int
cn2
=
cn
;
CheckNTErrors
((
am2
==
bn2
&&
an2
==
cn2
&&
bm2
==
cm2
),
"Unmatched tensors in multiplication!"
);
...
...
@@ -82,10 +82,11 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
b
->
dataType
==
DEFAULT_DTYPE
&&
c
->
dataType
==
DEFAULT_DTYPE
)
{
if
(
use
BLAS
)
#if defined(USE_
BLAS)
_MatrixMULCPU
(
a
,
transposedA
,
b
,
transposedB
,
c
,
alpha
,
beta
);
else
#
else
_MatrixMul2DParallel
(
a
,
transposedA
,
b
,
transposedB
,
c
,
alpha
,
beta
,
parallelRunner
);
#endif
}
else
{
// TODO!!
...
...
source/tensor/core/arithmetic/MatrixMulBatched.cpp
查看文件 @
bc49d32a
...
...
@@ -199,10 +199,7 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
bi
->
data
=
(
char
*
)
b
->
data
+
i
*
bRealBlockSize
;
ci
->
data
=
(
char
*
)
c
->
data
+
i
*
cRealBlockSize
;
#ifdef USE_BLAS
if
(
useBLAS
)
_MatrixMULCPU
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
else
_MatrixMul2D
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
_MatrixMULCPU
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
#else
_MatrixMul2D
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
#endif
...
...
@@ -262,10 +259,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors
((
bi
->
order
==
2
),
"2d tensor (i.e., matrix) is required!"
);
CheckNTErrors
((
ci
->
order
==
2
),
"2d tensor (i.e., matrix) is required!"
);
#ifdef USE_BLAS
if
(
useBLAS
)
_MatrixMULCPU
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
else
_MatrixMul2D
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
#else
_MatrixMul2D
(
ai
,
transposedA
,
bi
,
transposedB
,
ci
,
alpha
,
beta
);
#endif
...
...
@@ -320,10 +314,12 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
_MatrixMulBatched
(
&
a
,
transposedA
,
&
b
,
transposedB
,
&
c
,
alpha
,
0
,
parallelRunner
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMULBATCHED
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedA
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedB
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMULBATCHED
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedA
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedB
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -376,10 +372,12 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
_MatrixMulBatched
(
&
a
,
X_NOTRANS
,
&
b
,
X_NOTRANS
,
&
c
,
alpha
,
0
,
parallelRunner
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMULBATCHED
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MATRIXMULBATCHED
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/arithmetic/MulAndShift.cpp
查看文件 @
bc49d32a
...
...
@@ -118,11 +118,87 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
}
/* tensor connections */
XLink
::
MakeLink
(
&
x
,
&
w
,
&
b
,
&
c
,
MATH_MULANDSHIFT
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
//XLink::AddParamToHead(&c, beta);
if
(
w
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
&
w
,
&
b
,
&
c
,
MATH_MULANDSHIFT
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
XLink
::
AddParamToHeadTrans
(
&
c
,
X_NOTRANS
);
}
/* destroy variables */
delete
[]
dimSize
;
DelTensorBuf
(
tmp
);
return
c
;
}
/*
operation c = x * w + b MulAndShift
>> x - tensor x
>> w - tensor w
>> b - tensor b
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication
*/
XTensor
MulAndShift
(
const
XTensor
&
x
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
w
,
MATRIX_TRANS_TYPE
transposedB
,
const
XTensor
&
b
,
DTYPE
alpha
,
XPRunner
*
parallelRunner
)
{
CheckNTErrors
(
x
.
dataType
==
w
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
x
.
order
>=
2
&&
w
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
int
xn
=
transposedA
==
X_TRANS
?
x
.
dimSizeRDI
[
0
]
:
x
.
dimSizeRDI
[
1
];
int
xm
=
transposedA
==
X_TRANS
?
x
.
dimSizeRDI
[
1
]
:
x
.
dimSizeRDI
[
0
];
int
wn
=
transposedB
==
X_TRANS
?
w
.
dimSizeRDI
[
0
]
:
w
.
dimSizeRDI
[
1
];
int
wm
=
transposedB
==
X_TRANS
?
w
.
dimSizeRDI
[
1
]
:
w
.
dimSizeRDI
[
0
];
int
order
=
x
.
order
+
w
.
order
-
2
;
int
sub
=
0
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
2
;
i
<
x
.
order
;
i
++
)
dimSize
[
sub
++
]
=
x
.
dimSizeRDI
[
x
.
order
+
1
-
i
];
for
(
int
i
=
2
;
i
<
w
.
order
;
i
++
)
dimSize
[
sub
++
]
=
w
.
dimSizeRDI
[
w
.
order
+
1
-
i
];
dimSize
[
sub
++
]
=
xn
;
dimSize
[
sub
++
]
=
wm
;
float
dr
=
(
!
x
.
isSparse
||
!
w
.
isSparse
)
?
1.0
F
:
MAX
(
x
.
denseRatio
,
w
.
denseRatio
);
XTensor
*
tmp
=
NewTensorBuf
(
order
,
dimSize
,
x
.
dataType
,
dr
,
x
.
devID
,
x
.
mem
);
/* call _MatrixMul function */
_MatrixMul
(
&
x
,
transposedA
,
&
w
,
transposedB
,
tmp
,
alpha
,
0
,
parallelRunner
);
XTensor
c
(
tmp
);
c
.
SetTMPFlag
();
int
n
=
GetSumIndex
(
tmp
,
b
);
if
(
n
==
-
1
)
{
/* call _Sum function */
_Sum
(
tmp
,
&
b
,
&
c
);
// TODO!!
ShowNTErrors
(
"TODO!"
);
}
else
if
(
n
>=
0
&&
n
<
tmp
->
order
)
{
/* call _SumDim function */
_SumDim
(
tmp
,
&
b
,
&
c
,
n
);
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
}
/* tensor connections */
if
(
w
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
&
w
,
&
b
,
&
c
,
MATH_MULANDSHIFT
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedA
);
XLink
::
AddParamToHeadTrans
(
&
c
,
transposedB
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/arithmetic/MulAndShift.h
查看文件 @
bc49d32a
...
...
@@ -31,6 +31,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
XTensor
MulAndShift
(
const
XTensor
&
x
,
const
XTensor
&
w
,
const
XTensor
&
b
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
XPRunner
*
parallelRunner
=
NULL
);
XTensor
MulAndShift
(
const
XTensor
&
x
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
w
,
MATRIX_TRANS_TYPE
transposedB
,
const
XTensor
&
b
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
XPRunner
*
parallelRunner
=
NULL
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/Multiply.cpp
查看文件 @
bc49d32a
...
...
@@ -216,18 +216,22 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
_Multiply
(
&
a
,
&
b
,
&
c
,
0
,
leadingDim
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLY
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLY
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
}
}
else
if
(
n
>=
0
&&
n
<
a
.
order
){
/* call _MultiplyDim function */
_MultiplyDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
}
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
...
...
@@ -262,7 +266,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */
_Multiply
(
&
a
,
&
b
,
&
c
,
0
,
leadingDim
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLY
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
...
...
@@ -273,7 +277,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */
_MultiplyDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
source/tensor/core/arithmetic/MultiplyDim.cpp
查看文件 @
bc49d32a
...
...
@@ -180,9 +180,11 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
_MultiplyDim
(
&
a
,
&
b
,
&
c
,
n
,
0
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
0
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
0
);
}
return
c
;
}
...
...
@@ -208,7 +210,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
/* call _Multiply function */
_MultiplyDim
(
&
a
,
&
b
,
&
c
,
n
,
0
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
@@ -350,8 +352,10 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b)
_MultiplyBroadcast
(
&
a
,
&
b
,
&
c
,
0
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYBROADCAST
);
XLink
::
AddParamToHead
(
&
c
,
0
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYBROADCAST
);
XLink
::
AddParamToHead
(
&
c
,
0
);
}
return
c
;
}
...
...
@@ -374,7 +378,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
/* call _SumBroadcast function */
_MultiplyBroadcast
(
&
a
,
&
b
,
&
c
,
0
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYBROADCAST
);
XLink
::
AddParamToHead
(
&
c
,
0
);
...
...
source/tensor/core/arithmetic/Sub.cpp
查看文件 @
bc49d32a
...
...
@@ -190,17 +190,21 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
_Sub
(
&
a
,
&
b
,
&
c
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUB
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUB
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
}
else
if
(
n
>=
0
&&
n
<
a
.
order
){
/* call _SubDim function */
_SubDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
...
...
@@ -229,7 +233,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _Sub function */
_Sub
(
&
a
,
&
b
,
&
c
,
beta
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUB
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
...
...
@@ -239,7 +243,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SubDim function */
_SubDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
source/tensor/core/arithmetic/SubDim.cpp
查看文件 @
bc49d32a
...
...
@@ -164,9 +164,11 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
_SubDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
return
c
;
}
...
...
@@ -193,7 +195,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
/* call _Sub function */
_SubDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
source/tensor/core/arithmetic/Sum.cpp
查看文件 @
bc49d32a
...
...
@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../../XBLAS.h"
#include "../movement/CopyValues.h"
#include "Sum.h"
#include "Sum.cuh"
...
...
@@ -84,29 +85,57 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
DTYPE
*
ap
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
bp
=
(
DTYPE
*
)
b
->
data
;
DTYPE
*
cp
=
(
DTYPE
*
)
c
->
data
;
/* unrolling */
int
num
=
a
->
unitNum
;
if
(
num
%
4
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
cp
[
i
+
1
]
=
ap
[
i
+
1
]
+
bp
[
i
+
1
]
*
beta
;
cp
[
i
+
2
]
=
ap
[
i
+
2
]
+
bp
[
i
+
2
]
*
beta
;
cp
[
i
+
3
]
=
ap
[
i
+
3
]
+
bp
[
i
+
3
]
*
beta
;
}
/* when c != a, OpenBLAS needs to copy a to c first. This operation
slow down the speed, so just use OpenBLAS when c == a */
#if defined(USE_BLAS)
if
(
c
==
a
){
AXPY
(
a
->
unitNum
,
beta
,
bp
,
1
,
cp
,
1
);
}
else
{
int
num
=
a
->
unitNum
;
if
(
num
%
4
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
cp
[
i
+
1
]
=
ap
[
i
+
1
]
+
bp
[
i
+
1
]
*
beta
;
cp
[
i
+
2
]
=
ap
[
i
+
2
]
+
bp
[
i
+
2
]
*
beta
;
cp
[
i
+
3
]
=
ap
[
i
+
3
]
+
bp
[
i
+
3
]
*
beta
;
}
}
else
if
(
num
%
2
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
2
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
cp
[
i
+
1
]
=
ap
[
i
+
1
]
+
bp
[
i
+
1
]
*
beta
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
}
}
}
else
if
(
num
%
2
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
2
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
cp
[
i
+
1
]
=
ap
[
i
+
1
]
+
bp
[
i
+
1
]
*
beta
;
#else
/* unrolling */
int
num
=
a
->
unitNum
;
if
(
num
%
4
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
cp
[
i
+
1
]
=
ap
[
i
+
1
]
+
bp
[
i
+
1
]
*
beta
;
cp
[
i
+
2
]
=
ap
[
i
+
2
]
+
bp
[
i
+
2
]
*
beta
;
cp
[
i
+
3
]
=
ap
[
i
+
3
]
+
bp
[
i
+
3
]
*
beta
;
}
}
}
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
else
if
(
num
%
2
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
2
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
cp
[
i
+
1
]
=
ap
[
i
+
1
]
+
bp
[
i
+
1
]
*
beta
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
cp
[
i
]
=
ap
[
i
]
+
bp
[
i
]
*
beta
;
}
}
#endif
}
}
else
{
// TODO!!
ShowNTErrors
(
"TODO!"
);
...
...
@@ -195,17 +224,21 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
_Sum
(
&
a
,
&
b
,
&
c
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUM
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUM
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
}
else
if
(
n
>=
0
&&
n
<
a
.
order
){
/* call _SumDim function */
_SumDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
...
...
@@ -232,9 +265,9 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
if
(
n
==
-
1
)
{
/* call _Sum function */
_Sum
(
&
a
,
&
b
,
&
c
,
beta
);
if
(
c
.
enableGrad
)
{
/* tensor connections */
/* tensor connections */
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUM
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
...
...
@@ -242,9 +275,9 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
else
if
(
n
>=
0
&&
n
<
a
.
order
)
{
/* call _SumDim function */
_SumDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
if
(
c
.
enableGrad
)
{
/* tensor connections */
/* tensor connections */
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
...
...
source/tensor/core/arithmetic/SumDim.cpp
查看文件 @
bc49d32a
...
...
@@ -181,9 +181,11 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
_SumDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
return
c
;
}
...
...
@@ -210,7 +212,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
/* call _SumDim function */
_SumDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
...
...
@@ -353,9 +355,11 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta)
_SumBroadcast
(
&
a
,
&
b
,
&
c
,
beta
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMBROADCAST
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMBROADCAST
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
}
return
c
;
}
...
...
@@ -377,7 +381,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SumBroadcast function */
_SumBroadcast
(
&
a
,
&
b
,
&
c
,
beta
);
if
(
c
.
enableGrad
)
{
if
(
a
.
enableGrad
&&
b
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUMBROADCAST
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
...
...
source/tensor/core/getandset/ConvertDataType.cpp
查看文件 @
bc49d32a
...
...
@@ -121,7 +121,8 @@ XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType)
_ConvertDataType
(
&
input
,
&
output
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
GETANDSET_CONVERTDATATYPE
);
if
(
input
.
enableGrad
)
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
GETANDSET_CONVERTDATATYPE
);
return
output
;
}
...
...
@@ -136,7 +137,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
_ConvertDataType
(
&
input
,
&
output
);
/* tensor connection */
if
(
out
put
.
enableGrad
)
if
(
in
put
.
enableGrad
)
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
GETANDSET_CONVERTDATATYPE
);
}
...
...
source/tensor/core/getandset/OnehotAndIndex.cpp
查看文件 @
bc49d32a
...
...
@@ -32,65 +32,43 @@ convert onehot tensor to index tensor
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
*/
void
_OnehotToIndex
(
XTensor
*
onehot
,
XTensor
*
index
,
int
dim
)
void
_OnehotToIndex
(
const
XTensor
*
onehot
,
XTensor
*
index
,
int
size
)
{
dim
=
(
dim
<
0
?
onehot
->
GetDim
(
-
1
)
:
dim
);
CheckNTErrors
(
onehot
->
GetDim
(
-
1
)
==
size
,
"Illegal tensor dimension!"
);
CheckNTErrors
(
onehot
->
order
==
index
->
order
+
1
,
"Illegal tensor order!"
);
CheckNTErrors
(
dim
<
onehot
->
order
,
"Illegal speficied dimension!"
)
CheckNTErrors
(
onehot
->
dataType
==
X_INT
,
"The onehot tensor must be in X_INT!"
)
CheckNTErrors
(
index
->
dataType
==
X_INT
,
"The index tensor must be in X_INT!"
)
for
(
int
i
=
0
;
i
<
index
->
order
;
i
++
)
{
if
(
i
<
dim
)
{
CheckNTErrors
(
index
->
GetDim
(
i
)
==
onehot
->
GetDim
(
i
),
"Illegal tensor order!"
);
}
else
{
CheckNTErrors
(
index
->
GetDim
(
i
)
==
onehot
->
GetDim
(
i
+
1
),
"Illegal tensor order!"
);
}
}
for
(
int
i
=
0
;
i
<
index
->
order
;
i
++
)
CheckNTErrors
(
index
->
GetDim
(
i
)
==
onehot
->
GetDim
(
i
),
"Illegal tensor order!"
);
#ifdef USE_CUDA
if
(
onehot
->
devID
>=
0
&&
index
->
devID
>=
0
)
{
_CudaOnehotToIndex
(
onehot
,
index
,
dim
);
_CudaOnehotToIndex
(
onehot
,
index
,
size
);
return
;
}
#endif
int
blockNum
=
1
;
int
blockSize
=
1
;
int
dimSize
=
1
;
int
stride
=
1
;
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
blockNum
*=
onehot
->
GetDim
(
i
);
blockSize
=
onehot
->
unitNum
/
blockNum
;
dimSize
=
onehot
->
GetDim
(
dim
);
for
(
int
i
=
dim
+
1
;
i
<
onehot
->
order
;
i
++
)
stride
*=
onehot
->
GetDim
(
i
);
int
blockNum
=
index
->
unitNum
;
int
stride
=
size
;
int
*
onehotData
=
(
int
*
)
onehot
->
data
;
int
*
indexData
=
(
int
*
)
index
->
data
;
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
int
*
od
=
onehotData
+
i
*
stride
;
int
record
=
-
1
;
for
(
int
j
=
0
;
j
<
stride
;
j
++
)
{
int
*
od
=
onehotData
+
i
*
blockSize
+
j
;
int
*
index
=
indexData
+
i
*
stride
+
j
;
int
record
=
-
1
;
for
(
int
j
=
0
;
j
<
dimSize
;
j
++
)
{
if
(
od
[
j
*
stride
]
!=
0
)
{
if
(
record
==
-
1
)
record
=
j
;
else
ShowNTErrors
(
"The value of onehot tensor is illegal!"
);
}
if
(
od
[
j
]
!=
0
)
{
if
(
record
==
-
1
)
record
=
j
;
else
ShowNTErrors
(
"The value of onehot tensor is illegal!"
);
}
*
index
=
record
;
}
indexData
[
i
]
=
record
;
}
}
/*
...
...
@@ -101,7 +79,7 @@ make a new tensor to keep the result and return it
>> size - the last dimension size of the onehot tensor
<< return - the index tensor
*/
XTensor
OnehotToIndex
(
XTensor
&
onehot
,
int
size
)
XTensor
OnehotToIndex
(
const
XTensor
&
onehot
,
int
size
)
{
CheckNTErrors
(
onehot
.
GetDim
(
-
1
)
==
size
,
"Illegal tensor dimension!"
);
CheckNTErrors
(
onehot
.
dataType
==
X_INT
,
"The onehot tensor must be in X_INT!"
)
...
...
@@ -123,10 +101,9 @@ convert index tensor to onehot tensor
>> size - the last dimension size of the onehot tensor
*/
void
_IndexToOnehot
(
const
XTensor
*
index
,
XTensor
*
onehot
,
float
labelSmoothingP
)
int
size
,
float
labelSmoothingP
)
{
int
size
=
onehot
->
GetDim
(
-
1
);
CheckNTErrors
(
onehot
->
GetDim
(
-
1
)
==
size
,
"Illegal tensor dimension!"
);
CheckNTErrors
(
onehot
->
order
==
index
->
order
+
1
,
"Illegal tensor order!"
);
//CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
CheckNTErrors
(
index
->
dataType
==
X_INT
,
"The index tensor must be in X_INT!"
)
...
...
@@ -171,7 +148,7 @@ make a new tensor to keep the result and return it
>> confidence - labelsmoothing
<< return - the onehot tensor
*/
XTensor
IndexToOnehot
(
XTensor
&
index
,
int
size
,
float
labelSmoothingP
)
XTensor
IndexToOnehot
(
const
XTensor
&
index
,
int
size
,
float
labelSmoothingP
)
{
CheckNTErrors
(
index
.
dataType
==
X_INT
,
"The onehot tensor must be in X_INT!"
)
...
...
@@ -184,11 +161,11 @@ XTensor IndexToOnehot(XTensor & index, int size, float labelSmoothingP)
dim
[
order
]
=
size
;
InitTensor
(
&
onehot
,
index
.
order
+
1
,
dim
,
X_FLOAT
,
1.0
F
,
index
.
devID
,
index
.
mem
);
_IndexToOnehot
(
&
index
,
&
onehot
,
labelSmoothingP
);
_IndexToOnehot
(
&
index
,
&
onehot
,
size
,
labelSmoothingP
);
delete
[]
dim
;
return
onehot
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/getandset/OnehotAndIndex.cu
查看文件 @
bc49d32a
...
...
@@ -61,7 +61,7 @@ convert onehot tensor to index tensor (cuda version)
>> index - index tensor, which value is an integer num
>> size - the last dimension size of the onehot tensor
*/
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size)
void _CudaOnehotToIndex(
const
XTensor * onehot, XTensor * index, int size)
{
int devID = onehot->devID;
...
...
@@ -153,4 +153,4 @@ void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
source/tensor/core/getandset/OnehotAndIndex.cuh
查看文件 @
bc49d32a
...
...
@@ -27,10 +27,11 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor (cuda version) */
void _CudaOnehotToIndex(XTensor * onehot, XTensor * index, int size);
void _CudaOnehotToIndex(
const
XTensor * onehot, XTensor * index, int size);
/* convert index tensor to onehot tensor (cuda version) */
void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot, int size, float confidence, float lowconfidence);
void _CudaIndexToOnehot(const XTensor * index, XTensor * onehot,
int size, float confidence, float lowconfidence);
} // namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/getandset/OnehotAndIndex.h
查看文件 @
bc49d32a
...
...
@@ -27,19 +27,18 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* convert onehot tensor to index tensor */
void
_OnehotToIndex
(
XTensor
*
onehot
,
XTensor
*
index
,
int
dim
);
void
_OnehotToIndex
(
const
XTensor
*
onehot
,
XTensor
*
index
,
int
size
);
/* convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor
OnehotToIndex
(
XTensor
&
onehot
,
int
size
);
XTensor
OnehotToIndex
(
const
XTensor
&
onehot
,
int
num
);
/* convert index tensor to onehot tensor */
void
_IndexToOnehot
(
const
XTensor
*
index
,
XTensor
*
onehot
,
float
labelSmoothingP
=
0
.
0
F
);
void
_IndexToOnehot
(
const
XTensor
*
index
,
XTensor
*
onehot
,
int
size
,
float
labelSmoothingP
);
/* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor
IndexToOnehot
(
XTensor
&
index
,
int
size
,
float
labelSmoothingP
=
0
.
0
F
);
XTensor
IndexToOnehot
(
const
XTensor
&
index
,
int
num
,
float
labelSmoothingP
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/getandset/Select.cpp
查看文件 @
bc49d32a
...
...
@@ -117,10 +117,12 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
_SelectRange
(
&
a
,
&
c
,
dim
,
low
,
high
);
/* tensor connection */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
c
,
GETANDSET_SELECT
);
XLink
::
AddParamToHeadInt
(
&
c
,
dim
);
XLink
::
AddParamToHeadInt
(
&
c
,
low
);
XLink
::
AddParamToHeadInt
(
&
c
,
high
);
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
NULL
,
&
c
,
GETANDSET_SELECT
);
XLink
::
AddParamToHeadInt
(
&
c
,
dim
);
XLink
::
AddParamToHeadInt
(
&
c
,
low
);
XLink
::
AddParamToHeadInt
(
&
c
,
high
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/getandset/SetData.cpp
查看文件 @
bc49d32a
...
...
@@ -526,6 +526,43 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
}
}
/* generate data items with a range by start, end and the step
>> tensor - the tensor whose data array would be initialized
>> start - the begin of the array
>> end - the end of the array (not included self)
>> step - the step of two items
*/
void
_SetDataRange
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
step
)
{
CheckNTErrors
((
tensor
->
order
==
1
),
"Tensor must be 1 dimension!"
);
/* compute the true length according to the (start, end, step) */
DTYPE
size
=
fabs
(
upper
-
lower
);
int
num
=
ceil
(
size
/
fabs
(
step
));
CheckNTErrors
((
tensor
->
unitNum
==
num
),
"Unit number of the tensor is not matched."
);
/* init a integer array to store the sequence */
void
*
data
=
NULL
;
if
(
tensor
->
dataType
==
X_INT
)
{
data
=
new
int
[
num
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
*
((
int
*
)
data
+
i
)
=
lower
+
i
*
step
;
}
else
if
(
tensor
->
dataType
==
X_FLOAT
)
{
data
=
new
float
[
num
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
*
((
float
*
)
data
+
i
)
=
lower
+
i
*
step
;
}
else
{
ShowNTErrors
(
"TODO!"
);
}
/* set the data from the array */
tensor
->
SetData
(
data
,
num
);
delete
[]
data
;
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
...
...
source/tensor/core/getandset/SetData.h
查看文件 @
bc49d32a
...
...
@@ -69,6 +69,9 @@ void _SetDataRand(XTensor * tensor, int rNum, int cNum);
/* generate data items with a uniform distribution in [lower, upper] */
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
/* generate data items with a range by start, end and the step */
void
_SetDataRange
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
step
);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void
_SetDataRandP
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
);
...
...
source/tensor/core/math/Binary.cpp
查看文件 @
bc49d32a
...
...
@@ -167,7 +167,9 @@ XTensor funcName(const XTensor &a, T num)
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, num); \
XLink::MakeLink(&a, NULL, &b, operationId); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
XLink::AddParamToHead(&b, num); \
return b; \
} \
...
...
@@ -183,7 +185,7 @@ void funcName(const XTensor &a, XTensor &b, T num)
InitTensor(&b, &a); \
} \
_funcName(&a, &b, num); \
if (
b
.enableGrad) { \
if (
a
.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
} \
...
...
source/tensor/core/math/Clip.cpp
查看文件 @
bc49d32a
...
...
@@ -36,26 +36,26 @@ set every entry to its clip value
void
_Clip
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
lower
,
DTYPE
upper
)
{
#ifdef USE_CUDA
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaClip
(
a
,
b
,
lower
,
upper
);
return
;
}
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaClip
(
a
,
b
,
lower
,
upper
);
return
;
}
#endif
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
if
(
d
[
i
]
>
upper
)
db
[
i
]
=
upper
;
else
if
(
d
[
i
]
<
lower
)
db
[
i
]
=
lower
;
else
db
[
i
]
=
d
[
i
];
}
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
if
(
d
[
i
]
>
upper
)
db
[
i
]
=
upper
;
else
if
(
d
[
i
]
<
lower
)
db
[
i
]
=
lower
;
else
db
[
i
]
=
d
[
i
];
}
}
/*
...
...
@@ -99,9 +99,11 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
_Clip
(
&
a
,
&
b
,
lower
,
upper
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_CLIP
);
XLink
::
AddParamToHead
(
&
b
,
lower
);
XLink
::
AddParamToHead
(
&
b
,
upper
);
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_CLIP
);
XLink
::
AddParamToHead
(
&
b
,
lower
);
XLink
::
AddParamToHead
(
&
b
,
upper
);
}
return
b
;
}
...
...
@@ -115,8 +117,8 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
/* call _Clip function */
_Clip
(
&
a
,
&
b
,
lower
,
upper
);
if
(
b
.
enableGrad
)
{
/* tensor connections */
/* tensor connections */
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_CLIP
);
XLink
::
AddParamToHead
(
&
b
,
lower
);
XLink
::
AddParamToHead
(
&
b
,
upper
);
...
...
source/tensor/core/math/Compare.cpp
查看文件 @
bc49d32a
...
...
@@ -20,6 +20,7 @@
*/
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "Compare.h"
#include "Compare.cuh"
...
...
@@ -123,4 +124,95 @@ SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION
(
NotEqual
,
_NotEqual
,
MATH_NOTEQUAL
)
SIMPLE_COMPARE_FUNCTION_VOID
(
NotEqual
,
_NotEqual
,
MATH_NOTEQUAL
)
/* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a->devID, b->devID); \
CheckDev(a->devID, c->devID); \
/* run it on GPUs */
\
if (a->devID >= 0) { \
_cudaFuncName(a, b, c); \
return; \
} \
DTYPE * da = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
DTYPE * dc = (DTYPE*)c->data; \
for (int i = 0; i < a->unitNum; i++) \
dc[i] = (DTYPE)origFunc(da[i], db[i]); \
}
#else
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor *c) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a, b); \
CheckDev(a, c); \
/* run it on GPUs */
\
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
DTYPE * da = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
DTYPE * dc = (DTYPE*)c->data; \
for (int i = 0; i < a->unitNum; i++) \
dc[i] = (DTYPE)origFunc(da[i], db[i]); \
}
#endif
#define _SIMPLE_MAX_MIN_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, const XTensor * b) \
{ \
_funcName(a, b, a); \
}
#define SIMPLE_MAX_MIN_FUNCTION_ME(funcNameMe, _funcName) \
void funcNameMe(XTensor & a, const XTensor & b) \
{ \
_funcName(&a, &b, &a); \
}
#define SIMPLE_MAX_MIN_FUNCTION(funcName, _funcName, operationId) \
XTensor funcName(const XTensor & a, const XTensor & b) \
{ \
XTensor c(&a); \
c.SetTMPFlag(); \
_funcName(&a, &b, &c); \
return c; \
}
#define SIMPLE_MAX_MIN_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, const XTensor &b, XTensor c) \
{ \
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { \
InitTensor(&c, &a); \
} \
_funcName(&a, &b, &c); \
}
#ifdef USE_CUDA
_SIMPLE_MAX_MIN_FUNCTION
(
_Max
,
_CudaMax
,
max
)
_SIMPLE_MAX_MIN_FUNCTION
(
_Min
,
_CudaMin
,
min
)
#else
_SIMPLE_MAX_MIN_FUNCTION
(
_Max
,
max
)
_SIMPLE_MAX_MIN_FUNCTION
(
_Min
,
min
)
#endif
_SIMPLE_MAX_MIN_FUNCTION_ME
(
_MaxMe
,
_Max
)
SIMPLE_MAX_MIN_FUNCTION_ME
(
MaxMe
,
_Max
)
SIMPLE_MAX_MIN_FUNCTION
(
Max
,
_Max
,
MATH_MAX
)
SIMPLE_MAX_MIN_FUNCTION_VOID
(
Max
,
_Max
,
MATH_MAX
)
_SIMPLE_MAX_MIN_FUNCTION_ME
(
_MinMe
,
_Min
)
SIMPLE_MAX_MIN_FUNCTION_ME
(
MinMe
,
_Min
)
SIMPLE_MAX_MIN_FUNCTION
(
Min
,
_Min
,
MATH_MIN
)
SIMPLE_MAX_MIN_FUNCTION_VOID
(
Min
,
_Min
,
MATH_MIN
)
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/math/Compare.cu
查看文件 @
bc49d32a
...
...
@@ -89,6 +89,53 @@ void _Cuda##funcName(const XTensor * a, XTensor * b, DTYPE number) \
SIMPLE_COMPARE_FUNCTION_GPU(Equal, cudaIsEqual)
SIMPLE_COMPARE_FUNCTION_GPU(NotEqual, cudaIsNotEqual)
#define SIMPLE_MAX_MIN_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, DTYPE * c, int size) \
{ \
int i = blockDim.x * blockIdx.x + threadIdx.x; \
\
if (i < size) \
c[i] = (DTYPE)origFunc(a[i], b[i]); \
} \
__global__ \
void Kernel##funcName(__half * a, __half * b, __half * c, int size) \
{ \
return; \
} \
void _Cuda##funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
\
int gridSize[3]; \
int blockSize[3]; \
\
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); \
\
dim3 blocks(gridSize[0]); \
dim3 threads(blockSize[0]); \
\
int devIDBackup; \
ProtectCudaDev(a->devID, devIDBackup); \
\
if (a->dataType == DEFAULT_DTYPE) { \
Kernel##funcName<<<blocks, threads>>> \
((DTYPE*)a->data, (DTYPE*)b->data, \
(DTYPE*)c->data, a->unitNum); \
} \
else if (a->dataType == X_FLOAT16) { \
Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, \
(__half*)c->data, a->unitNum); \
} \
else { \
ShowNTErrors("TODO!"); \
} \
\
BacktoCudaDev(a->devID, devIDBackup); \
}
SIMPLE_MAX_MIN_FUNCTION_GPU(Max, max)
SIMPLE_MAX_MIN_FUNCTION_GPU(Min, min)
#endif // USE_CUDA
...
...
source/tensor/core/math/Compare.cuh
查看文件 @
bc49d32a
...
...
@@ -34,6 +34,12 @@ void _CudaEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (cuda version) */
void _CudaNotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* return maximum of two tensor for each items (cuda version) */
void _CudaMax(const XTensor * a, const XTensor * b, XTensor *c);
/* return minimum of two tensor for each items (cuda version) */
void _CudaMin(const XTensor * a, const XTensor * b, XTensor *c);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/math/Compare.h
查看文件 @
bc49d32a
...
...
@@ -56,6 +56,36 @@ XTensor NotEqual(const XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value */
void
NotEqual
(
const
XTensor
&
a
,
XTensor
&
b
,
DTYPE
value
);
/* return maximum of two tensor for each items */
void
_Max
(
const
XTensor
*
a
,
const
XTensor
*
b
,
XTensor
*
c
);
/* return maximum of two tensor for each items (do it on site) */
void
_MaxMe
(
XTensor
*
a
,
const
XTensor
*
b
);
/* return maximum of two tensor for each items (do it on site) */
void
MaxMe
(
XTensor
&
a
,
const
XTensor
&
b
);
/* return maximum of two tensor for each items (return an XTensor structure) */
XTensor
Max
(
const
XTensor
&
a
,
const
XTensor
&
b
);
/* return maximum of two tensor for each items */
void
Max
(
const
XTensor
&
a
,
const
XTensor
&
b
,
XTensor
&
c
);
/* return minimum of two tensor for each items */
void
_Min
(
const
XTensor
*
a
,
const
XTensor
*
b
,
XTensor
*
c
);
/* return minimum of two tensor for each items (do it on site) */
void
_MinMe
(
XTensor
*
a
,
const
XTensor
*
b
);
/* return minimum of two tensor for each items (do it on site) */
void
MinMe
(
XTensor
&
a
,
const
XTensor
&
b
);
/* return minimum of two tensor for each items (return an XTensor structure) */
XTensor
Min
(
const
XTensor
&
a
,
const
XTensor
&
b
);
/* return minimum of two tensor for each items */
void
Min
(
const
XTensor
&
a
,
const
XTensor
&
b
,
XTensor
&
c
);
}
// namespace nts(NiuTrans.Tensor)
#endif // end __COMPARE_H__
\ No newline at end of file
source/tensor/core/math/Normalize.cpp
查看文件 @
bc49d32a
...
...
@@ -46,7 +46,7 @@ void _Normalize(const XTensor * input, XTensor * output, int dim,
const
XTensor
*
mean
,
const
XTensor
*
var
,
const
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
epsilon
)
{
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
((
XTensor
::
IsSameShaped
(
input
,
output
)),
"Unmatched input tensors!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Unmatched input tensors"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
mean
,
var
)),
"Unmatched input tensors"
);
...
...
@@ -173,9 +173,11 @@ XTensor Normalize(const XTensor &input, int dim,
list
.
Add
((
XTensor
*
)
&
var
);
list
.
Add
((
XTensor
*
)
&
a
);
list
.
Add
((
XTensor
*
)
&
b
);
XLink
::
MakeLink
(
&
list
,
&
output
,
MATH_NORMALIZE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
epsilon
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
list
,
&
output
,
MATH_NORMALIZE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
epsilon
);
}
return
output
;
}
...
...
@@ -208,7 +210,7 @@ void Normalize(const XTensor &input, XTensor &output, int dim,
/* call _Normalize function */
_Normalize
(
&
input
,
&
output
,
dim
,
&
mean
,
&
var
,
&
a
,
&
b
,
epsilon
);
if
(
out
put
.
enableGrad
==
true
)
{
if
(
in
put
.
enableGrad
==
true
)
{
/* tensor connections */
TensorList
list
(
5
);
list
.
Add
((
XTensor
*
)
&
input
);
...
...
source/tensor/core/math/ScaleAndShift.cpp
查看文件 @
bc49d32a
...
...
@@ -126,9 +126,11 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
_ScaleAndShift
(
&
a
,
&
b
,
scale
,
shift
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_SCALEANDSHIFT
);
XLink
::
AddParamToHead
(
&
b
,
scale
);
XLink
::
AddParamToHead
(
&
b
,
shift
);
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_SCALEANDSHIFT
);
XLink
::
AddParamToHead
(
&
b
,
scale
);
XLink
::
AddParamToHead
(
&
b
,
shift
);
}
return
b
;
}
...
...
@@ -152,7 +154,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift)
/* call _ScaleAndShift function */
_ScaleAndShift
(
&
a
,
&
b
,
scale
,
shift
);
if
(
b
.
enableGrad
)
{
if
(
a
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_SCALEANDSHIFT
);
XLink
::
AddParamToHead
(
&
b
,
scale
);
...
...
source/tensor/core/math/Unary.cpp
查看文件 @
bc49d32a
...
...
@@ -151,7 +151,9 @@ XTensor funcName(const XTensor & a)
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b); \
XLink::MakeLink(&a, NULL, &b, operationId); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
return b; \
}
...
...
@@ -162,7 +164,7 @@ void funcName(const XTensor & a, XTensor & b)
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (
b
.enableGrad) { \
if (
a
.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
...
...
source/tensor/core/movement/CopyIndexed.cpp
查看文件 @
bc49d32a
...
...
@@ -258,10 +258,12 @@ XTensor CopyIndexed(const XTensor & s, int dim,
list
.
Add
((
XTensor
*
)
&
tgtIndex
);
/* tensor connection */
XLink
::
MakeLink
(
&
list
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
list
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -314,13 +316,15 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
memcpy
(
saveTgtIndex
,
tgtIndex
,
indexSize
*
sizeof
(
int
));
/* tensor connection */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
XLink
::
AddParamToHeadPointer
(
&
t
,
saveSrcIndex
);
XLink
::
AddParamToHeadInt
(
&
t
,
indexSize
);
XLink
::
AddParamToHeadPointer
(
&
t
,
saveTgtIndex
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
XLink
::
AddParamToHeadPointer
(
&
t
,
saveSrcIndex
);
XLink
::
AddParamToHeadInt
(
&
t
,
indexSize
);
XLink
::
AddParamToHeadPointer
(
&
t
,
saveTgtIndex
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/movement/CopyValues.cpp
查看文件 @
bc49d32a
...
...
@@ -134,7 +134,9 @@ XTensor CopyValues(const XTensor &s, XStream * stream)
_CopyValues
(
&
s
,
&
t
,
stream
);
/* tensor connection */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYVALUES
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYVALUES
);
}
return
t
;
}
...
...
source/tensor/core/movement/Gather.cpp
查看文件 @
bc49d32a
...
...
@@ -93,7 +93,9 @@ XTensor Gather(XTensor &s, XTensor &index)
_Gather
(
&
s
,
&
t
,
&
index
);
/* tensor connection */
XLink
::
MakeLink
(
&
s
,
&
index
,
&
t
,
MOVEMENT_GATHER
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
s
,
&
index
,
&
t
,
MOVEMENT_GATHER
);
}
return
t
;
}
...
...
source/tensor/core/reduce/ReduceMax.cpp
查看文件 @
bc49d32a
...
...
@@ -21,6 +21,8 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XBLAS.h"
#include "VectorBuffer.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
...
...
@@ -41,8 +43,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors
((
input
->
order
==
output
->
order
+
1
),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
input
->
order
>
dim
&&
dim
>=
0
),
"Illegal dimension to reduce!"
);
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
...
...
@@ -76,18 +78,75 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
}
blockSize
=
stride
*
strideNum
;
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
k
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
k
;
for
(
int
i
=
0
;
i
<
stride
;
i
++
){
DTYPE
max
=
FLOAT_MIN
;
DTYPE
*
ipe
=
ip
+
blockSize
;
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
v
=
*
ipb
;
if
(
max
<
v
)
max
=
v
;
if
(
input
->
dimSizeRDI
[
0
]
%
(
4
*
32
/
sizeof
(
DTYPE
))
==
0
&&
input
->
dimSizeRDI
[
0
]
>=
32
){
int
vecBufLength
=
32
/
sizeof
(
DTYPE
);
if
(
dimRDI
==
0
){
//data is contiguous in dim 0
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
i
;
VectorBuffer
vecBuf
[
4
];
for
(
int
j
=
0
;
j
<
4
;
j
++
){
vecBuf
[
j
]
=
VectorBuffer
::
loadu
((
DTYPE
*
)(
ip
)
+
j
*
vecBufLength
);
}
for
(
int
j
=
1
;
j
<
strideNum
/
32
;
j
++
){
const
DTYPE
*
ptr
=
(
DTYPE
*
)(
ip
+
j
*
vecBufLength
);
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
0
*
vecBufLength
));
vecBuf
[
1
]
=
vecBuf
[
1
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
1
*
vecBufLength
));
vecBuf
[
2
]
=
vecBuf
[
2
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
2
*
vecBufLength
));
vecBuf
[
3
]
=
vecBuf
[
3
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
3
*
vecBufLength
));
}
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
vecBuf
[
1
]);
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
vecBuf
[
2
]);
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
vecBuf
[
3
]);
DTYPE
maxN
=
DTYPE_MIN
;
for
(
int
k
=
0
;
k
<
vecBufLength
;
k
++
){
maxN
=
MAX
(
maxN
,
vecBuf
[
0
][
k
]);
}
*
op
=
maxN
;
}
}
else
{
//data is separated
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
for
(
int
j
=
0
;
j
<
input
->
dimSizeRDI
[
0
]
/
32
;
j
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
i
;
VectorBuffer
vecBuf
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
){
vecBuf
[
k
]
=
VectorBuffer
::
loadu
((
DTYPE
*
)(
ip
)
+
(
j
*
4
+
k
)
*
32
/
sizeof
(
DTYPE
));
}
for
(
int
k
=
1
;
k
<
strideNum
;
k
++
){
DTYPE
*
ptr
=
ip
+
k
*
stride
+
(
j
*
4
)
*
vecBufLength
;
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
0
*
vecBufLength
));
vecBuf
[
1
]
=
vecBuf
[
1
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
1
*
vecBufLength
));
vecBuf
[
2
]
=
vecBuf
[
2
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
2
*
vecBufLength
));
vecBuf
[
3
]
=
vecBuf
[
3
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
3
*
vecBufLength
));
}
for
(
int
k
=
0
;
k
<
4
;
k
++
){
for
(
int
l
=
0
;
l
<
vecBufLength
;
l
++
)
*
(
op
+
j
*
32
+
8
*
k
+
l
)
=
vecBuf
[
k
][
l
];
}
}
}
}
}
//run vector buffer
else
{
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
k
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
k
;
for
(
int
i
=
0
;
i
<
stride
;
i
++
){
DTYPE
max
=
DTYPE_MIN
;
DTYPE
*
ipe
=
ip
+
blockSize
;
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
v
=
*
ipb
;
if
(
max
<
v
)
max
=
v
;
}
*
(
op
+
i
)
=
max
;
}
*
(
op
+
i
)
=
max
;
}
}
}
...
...
@@ -104,7 +163,7 @@ make a new tensor to keep the result and return it
XTensor
ReduceMax
(
const
XTensor
&
input
,
int
dim
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
@@ -122,8 +181,10 @@ XTensor ReduceMax(const XTensor &input, int dim)
_ReduceMax
(
&
input
,
&
output
,
dim
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -162,7 +223,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim)
/* call _ReduceMax function */
_ReduceMax
(
&
input
,
&
output
,
dim
);
if
(
out
put
.
enableGrad
)
{
if
(
in
put
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
...
...
source/tensor/core/reduce/ReduceMean.cpp
查看文件 @
bc49d32a
...
...
@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{
CheckNTErrors
((
input
->
order
>
dim
),
"Illegal dimension specified!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
_ReduceSum
(
input
,
output
,
dim
);
...
...
@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
XTensor
ReduceMean
(
const
XTensor
&
input
,
int
dim
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
@@ -77,8 +77,10 @@ XTensor ReduceMean(const XTensor &input, int dim)
_ReduceMean
(
&
input
,
&
output
,
dim
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMEAN
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMEAN
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -119,7 +121,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim)
/* call _ReduceMean function */
_ReduceMean
(
&
input
,
&
output
,
dim
);
if
(
out
put
.
enableGrad
)
{
if
(
in
put
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMEAN
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
...
...
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
bc49d32a
...
...
@@ -23,6 +23,9 @@
#include "ReduceSum.h"
#include "ReduceSum.cuh"
#include "../../XName.h"
#include "../../XBLAS.h"
#include "VectorBuffer.h"
#include <iostream>
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -50,7 +53,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
CheckNTErrors
((
shift
==
NULL
||
XTensor
::
IsSameShaped
(
output
,
shift
)),
"Incorrect shift tensor size!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
...
...
@@ -82,118 +85,188 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
}
blockSize
=
stride
*
strideNum
;
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
k
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
k
;
DTYPE
*
sp
=
shift
!=
NULL
?
(
DTYPE
*
)
shift
->
data
+
stride
*
k
:
NULL
;
for
(
int
i
=
0
;
i
<
stride
;
i
++
){
DTYPE
sum
=
0
;
DTYPE
bias
=
shift
!=
NULL
?
*
(
sp
+
i
)
:
0
;
DTYPE
*
ipe
=
ip
+
blockSize
;
if
(
isExp
){
if
(
bias
==
0
){
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
(
DTYPE
)
exp
(
*
ipb
);
if
(
input
->
dimSizeRDI
[
0
]
%
(
4
*
32
/
sizeof
(
DTYPE
))
==
0
&&
input
->
dimSizeRDI
[
0
]
>=
32
){
int
vecBufLength
=
32
/
sizeof
(
DTYPE
);
if
(
dimRDI
==
0
){
//data is contiguous in dim 0
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
// stride = 1
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
i
;
DTYPE
*
sp
=
shift
!=
NULL
?
(
DTYPE
*
)
shift
->
data
+
i
:
NULL
;
DTYPE
bias
[
32
/
sizeof
(
DTYPE
)]
=
{
0
};
if
(
shift
!=
NULL
){
for
(
int
k
=
0
;
k
<
32
/
sizeof
(
DTYPE
);
k
++
)
bias
[
k
]
=
*
(
sp
);
}
VectorBuffer
vecBuf
[
4
];
for
(
int
j
=
0
;
j
<
4
;
j
++
){
vecBuf
[
j
]
=
VectorBuffer
::
loadu
((
DTYPE
*
)(
ip
)
+
j
*
vecBufLength
,
isExp
,
power
,
bias
);
}
for
(
int
j
=
1
;
j
<
strideNum
/
32
;
j
++
){
const
DTYPE
*
ptr
=
(
DTYPE
*
)(
ip
+
j
*
vecBufLength
);
vecBuf
[
0
]
=
vecBuf
[
0
]
+
VectorBuffer
::
loadu
(
ptr
+
0
*
vecBufLength
,
isExp
,
power
,
bias
);
vecBuf
[
1
]
=
vecBuf
[
1
]
+
VectorBuffer
::
loadu
(
ptr
+
1
*
vecBufLength
,
isExp
,
power
,
bias
);
vecBuf
[
2
]
=
vecBuf
[
2
]
+
VectorBuffer
::
loadu
(
ptr
+
2
*
vecBufLength
,
isExp
,
power
,
bias
);
vecBuf
[
3
]
=
vecBuf
[
3
]
+
VectorBuffer
::
loadu
(
ptr
+
3
*
vecBufLength
,
isExp
,
power
,
bias
);
}
vecBuf
[
0
]
=
((
vecBuf
[
0
]
+
vecBuf
[
1
])
+
(
vecBuf
[
2
]
+
vecBuf
[
3
]));
DTYPE
sum
=
(
DTYPE
)
0.0
;
for
(
int
k
=
0
;
k
<
vecBufLength
;
k
++
){
sum
=
sum
+
vecBuf
[
0
][
k
];
}
*
op
=
sum
;
}
}
else
{
//data is separated
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
for
(
int
j
=
0
;
j
<
input
->
dimSizeRDI
[
0
]
/
32
;
j
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
i
;
DTYPE
*
sp
=
shift
!=
NULL
?
(
DTYPE
*
)
shift
->
data
+
stride
*
i
:
NULL
;
DTYPE
bias
[
4
*
32
/
sizeof
(
DTYPE
)]
=
{
0
};
if
(
shift
!=
NULL
){
for
(
int
k
=
0
;
k
<
4
*
32
/
sizeof
(
DTYPE
);
k
++
)
bias
[
k
]
=
*
(
sp
+
k
);
}
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
exp
(
value
*
value
);
}
VectorBuffer
vecBuf
[
4
];
for
(
int
k
=
0
;
k
<
4
;
k
++
){
vecBuf
[
k
]
=
VectorBuffer
::
loadu
((
DTYPE
*
)(
ip
)
+
(
j
*
4
+
k
)
*
32
/
sizeof
(
DTYPE
),
isExp
,
power
,
bias
+
j
*
32
/
sizeof
(
DTYPE
));
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
exp
(
sqrt
(
value
));
}
for
(
int
k
=
1
;
k
<
strideNum
;
k
++
){
DTYPE
*
ptr
=
ip
+
k
*
stride
+
(
j
*
4
)
*
vecBufLength
;
vecBuf
[
0
]
=
vecBuf
[
0
]
+
VectorBuffer
::
loadu
(
ptr
+
0
*
vecBufLength
,
isExp
,
power
,
bias
);
vecBuf
[
1
]
=
vecBuf
[
1
]
+
VectorBuffer
::
loadu
(
ptr
+
1
*
vecBufLength
,
isExp
,
power
,
bias
+
1
*
vecBufLength
);
vecBuf
[
2
]
=
vecBuf
[
2
]
+
VectorBuffer
::
loadu
(
ptr
+
2
*
vecBufLength
,
isExp
,
power
,
bias
+
2
*
vecBufLength
);
vecBuf
[
3
]
=
vecBuf
[
3
]
+
VectorBuffer
::
loadu
(
ptr
+
3
*
vecBufLength
,
isExp
,
power
,
bias
+
3
*
vecBufLength
);
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
exp
(
pow
(
value
,
power
));
}
for
(
int
k
=
0
;
k
<
4
;
k
++
){
for
(
int
l
=
0
;
l
<
vecBufLength
;
l
++
)
*
(
op
+
j
*
32
+
8
*
k
+
l
)
=
vecBuf
[
k
][
l
];
}
}
else
{
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
(
DTYPE
)
exp
(
*
ipb
-
bias
);
}
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
exp
(
value
*
value
);
}
}
}
//run vector buffer
else
{
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
k
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
k
;
DTYPE
*
sp
=
shift
!=
NULL
?
(
DTYPE
*
)
shift
->
data
+
stride
*
k
:
NULL
;
for
(
int
i
=
0
;
i
<
stride
;
i
++
){
DTYPE
sum
=
0
;
DTYPE
bias
=
shift
!=
NULL
?
*
(
sp
+
i
)
:
0
;
DTYPE
*
ipe
=
ip
+
blockSize
;
if
(
isExp
){
if
(
bias
==
0
){
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
(
DTYPE
)
exp
(
*
ipb
);
}
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
exp
(
sqrt
(
value
));
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
exp
(
value
*
value
);
}
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
exp
(
sqrt
(
value
));
}
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
exp
(
pow
(
value
,
power
));
}
}
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
exp
(
pow
(
value
,
power
)
);
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
(
DTYPE
)
exp
(
*
ipb
-
bias
);
}
}
}
}
else
{
if
(
bias
==
0
){
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
*
ipb
;
}
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
value
*
value
;
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
exp
(
value
*
value
);
}
}
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
sqrt
(
value
);
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
exp
(
sqrt
(
value
)
);
}
}
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
pow
(
value
,
power
);
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
{
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
exp
(
pow
(
value
,
power
)
);
}
}
}
}
else
{
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
*
ipb
;
sum
-=
strideNum
*
bias
;
}
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
value
*
value
;
if
(
bias
==
0
){
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
*
ipb
;
}
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
sqrt
(
value
);
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
value
*
value
;
}
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
sqrt
(
value
);
}
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
);
sum
+=
(
DTYPE
)
pow
(
value
,
power
);
}
}
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
pow
(
value
,
power
);
if
(
power
==
(
DTYPE
)
1.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
)
sum
+=
*
ipb
;
sum
-=
strideNum
*
bias
;
}
else
if
(
power
==
(
DTYPE
)
2.0
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
value
*
value
;
}
}
else
if
(
power
==
(
DTYPE
)
0.5
){
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
sqrt
(
value
);
}
}
else
{
for
(
DTYPE
*
ipb
=
ip
+
i
;
ipb
<
ipe
;
ipb
+=
stride
){
DTYPE
value
=
(
*
ipb
)
-
bias
;
sum
+=
(
DTYPE
)
pow
(
value
,
power
);
}
}
}
}
*
(
op
+
i
)
=
sum
;
}
*
(
op
+
i
)
=
sum
;
}
}
}
}
...
...
@@ -215,7 +288,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
,
bool
isExp
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
@@ -233,10 +306,12 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
_ReduceSum
(
&
input
,
&
output
,
dim
,
&
shift
,
power
,
isExp
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -268,7 +343,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh
/* call _ReduceSum function */
_ReduceSum
(
&
input
,
&
output
,
dim
,
&
shift
,
power
,
isExp
);
if
(
out
put
.
enableGrad
)
{
if
(
in
put
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
...
...
@@ -294,7 +369,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
,
bool
isExp
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
@@ -312,10 +387,12 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
_ReduceSum
(
&
input
,
&
output
,
dim
,
NULL
,
power
,
isExp
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -361,7 +438,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool
/* call _ReduceSum function */
_ReduceSum
(
&
input
,
&
output
,
dim
,
NULL
,
power
,
isExp
);
if
(
out
put
.
enableGrad
)
{
if
(
in
put
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
...
...
source/tensor/core/reduce/ReduceSumSquared.cpp
查看文件 @
bc49d32a
...
...
@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
XTensor
ReduceSumSquared
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
@@ -73,8 +73,10 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
_ReduceSumSquared
(
&
input
,
&
output
,
dim
,
&
shift
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUMSQUARED
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUMSQUARED
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -116,7 +118,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
/* call _ReduceSumSquared function */
_ReduceSumSquared
(
&
input
,
&
output
,
dim
,
&
shift
);
if
(
out
put
.
enableGrad
)
{
if
(
in
put
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUMSQUARED
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
...
...
source/tensor/core/reduce/ReduceVariance.cpp
查看文件 @
bc49d32a
...
...
@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
void
_ReduceVariance
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
)
{
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
_ReduceSum
(
input
,
output
,
dim
,
mean
,
2.0
F
);
_ScaleAndShiftMe
(
output
,
(
DTYPE
)
1
/
num
,
0
);
...
...
@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
XTensor
ReduceVariance
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
@@ -76,8 +76,10 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
&
mean
,
&
output
,
REDUCE_REDUCEVARIANCE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
if
(
input
.
enableGrad
)
{
XLink
::
MakeLink
(
&
input
,
&
mean
,
&
output
,
REDUCE_REDUCEVARIANCE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -119,7 +121,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
/* call _ReduceVariance function */
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
if
(
out
put
.
enableGrad
)
{
if
(
in
put
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
&
mean
,
&
output
,
REDUCE_REDUCEVARIANCE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
...
...
source/tensor/core/reduce/VectorBuffer.cpp
0 → 100644
查看文件 @
bc49d32a
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
#include "VectorBuffer.h"
namespace
nts
{
/* data size for each buffer */
int
VectorBuffer
::
size
()
{
return
32
/
sizeof
(
DTYPE
);
}
/* constructor */
VectorBuffer
::
VectorBuffer
()
{
}
/*
constructor
initial values with val
*/
VectorBuffer
::
VectorBuffer
(
DTYPE
val
)
{
for
(
int
i
=
0
;
i
!=
size
();
i
++
)
{
values
[
i
]
=
val
;
}
}
/* load data */
VectorBuffer
VectorBuffer
::
loadu
(
const
DTYPE
*
ptr
,
bool
isExp
,
DTYPE
power
,
DTYPE
*
bias
)
{
int
count
=
32
/
sizeof
(
DTYPE
);
VectorBuffer
vec
;
if
(
isExp
)
{
if
(
bias
==
NULL
)
{
if
(
power
==
(
DTYPE
)
1.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
*
(
ptr
+
i
));
}
}
else
if
(
power
==
(
DTYPE
)
2.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
((
*
(
ptr
+
i
))
*
(
*
(
ptr
+
i
)));
}
}
else
if
(
power
==
(
DTYPE
)
0.5
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
sqrt
(
*
(
ptr
+
i
)));
}
}
else
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
pow
(
*
(
ptr
+
i
),
power
));
}
}
}
/*is bias == NULL*/
else
{
if
(
power
==
(
DTYPE
)
1.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
*
(
ptr
+
i
)
-
bias
[
i
]);
}
}
else
if
(
power
==
(
DTYPE
)
2.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
DTYPE
value
=
*
(
ptr
+
i
)
-
bias
[
i
];
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
value
*
value
);
}
}
else
if
(
power
==
(
DTYPE
)
0.5
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
sqrt
(
*
(
ptr
+
i
)
-
bias
[
i
]));
}
}
else
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
exp
(
pow
(
*
(
ptr
+
i
)
-
bias
[
i
],
power
));
}
}
}
}
//isExp
else
{
if
(
bias
==
NULL
)
{
if
(
power
==
(
DTYPE
)
1.0
)
{
memcpy
(
vec
.
values
,
ptr
,
count
*
sizeof
(
DTYPE
));
}
else
if
(
power
==
(
DTYPE
)
2.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
*
(
ptr
+
i
))
*
(
*
(
ptr
+
i
));
}
}
else
if
(
power
==
(
DTYPE
)
0.5
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
sqrt
(
*
(
ptr
+
i
));
}
}
else
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
pow
(
*
(
ptr
+
i
),
power
);
}
}
}
// if bias == NULL
else
{
if
(
power
==
(
DTYPE
)
1.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
*
(
ptr
+
i
)
-
bias
[
i
];
}
}
else
if
(
power
==
(
DTYPE
)
2.0
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
DTYPE
value
=
*
(
ptr
+
i
)
-
bias
[
i
];
vec
.
values
[
i
]
=
value
*
value
;
}
}
else
if
(
power
==
(
DTYPE
)
0.5
)
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
sqrt
(
*
(
ptr
+
i
)
-
bias
[
i
]);
}
}
else
{
for
(
int
i
=
0
;
i
!=
count
;
i
++
)
{
vec
.
values
[
i
]
=
(
DTYPE
)
pow
(
*
(
ptr
+
i
)
-
bias
[
i
],
power
);
}
}
}
}
return
vec
;
}
/* overloading [] */
const
DTYPE
&
VectorBuffer
::
operator
[](
int
idx
)
const
{
return
values
[
idx
];
}
/* overloading + */
VectorBuffer
VectorBuffer
::
operator
+
(
const
VectorBuffer
&
a
)
{
for
(
int
i
=
0
;
i
!=
a
.
size
();
i
++
)
{
this
->
values
[
i
]
=
a
[
i
]
+
this
->
values
[
i
];
}
return
*
this
;
}
/* conculte the max of two buffer */
VectorBuffer
VectorBuffer
::
maxData
(
const
VectorBuffer
&
a
)
{
for
(
int
i
=
0
;
i
!=
a
.
size
();
i
++
)
{
this
->
values
[
i
]
=
MAX
(
a
[
i
],
this
->
values
[
i
]);
}
return
*
this
;
}
}
/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
source/tensor/core/reduce/VectorBuffer.h
0 → 100644
查看文件 @
bc49d32a
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
//#include <cstring>
#include <math.h>
#include "../../XGlobal.h"
namespace
nts
{
class
VectorBuffer
{
private
:
/* buffer for concluter */
DTYPE
values
[
32
/
sizeof
(
DTYPE
)]
=
{
0
};
public
:
/* data size for each buffer */
static
int
size
();
/* constructor */
VectorBuffer
();
/* constructor */
VectorBuffer
(
DTYPE
val
);
/* load data */
static
VectorBuffer
loadu
(
const
DTYPE
*
ptr
,
bool
isExp
=
false
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
DTYPE
*
bias
=
NULL
);
/* overloading [] */
const
DTYPE
&
operator
[](
int
idx
)
const
;
/* overloading + */
VectorBuffer
operator
+
(
const
VectorBuffer
&
a
);
/* conculte the max of two buffer */
VectorBuffer
maxData
(
const
VectorBuffer
&
a
);
};
}
\ No newline at end of file
source/tensor/core/shape/Concatenate.cpp
查看文件 @
bc49d32a
...
...
@@ -99,9 +99,11 @@ XTensor Concatenate(const TensorList &smalls, int dim)
_Merge
(
&
smalls
,
&
big
,
dim
);
/* tensor connection */
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
if
(
tensor
->
enableGrad
)
{
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -127,8 +129,10 @@ XTensor Concatenate(const TensorList &smalls, int dim)
_ConcatenateSolely
(
&
smalls
,
&
big
,
dim
);
/* tensor connection */
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_CONCATENATE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
if
(
tensor
->
enableGrad
)
{
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_CONCATENATE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -309,9 +313,11 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
_Merge
(
&
smalls
,
&
big
,
dim
);
/* tensor connection */
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
if
(
tensor
->
enableGrad
)
{
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -337,8 +343,10 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
_ConcatenateSolely
(
&
smalls
,
&
big
,
dim
);
/* tensor connection */
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_CONCATENATE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
if
(
tensor
->
enableGrad
)
{
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_CONCATENATE
);
XLink
::
AddParamToHeadInt
(
&
big
,
dim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/shape/Merge.cpp
查看文件 @
bc49d32a
...
...
@@ -222,9 +222,11 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
_Merge
(
&
s
,
&
t
,
whereToMerge
,
leadingDim
);
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
t
,
whereToMerge
);
XLink
::
AddParamToHeadInt
(
&
t
,
leadingDim
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
t
,
whereToMerge
);
XLink
::
AddParamToHeadInt
(
&
t
,
leadingDim
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -261,7 +263,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
/* call _Merge function */
_Merge
(
&
s
,
&
t
,
whereToMerge
,
leadingDim
);
if
(
t
.
enableGrad
)
{
if
(
s
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_MERGE
);
XLink
::
AddParamToHeadInt
(
&
t
,
whereToMerge
);
...
...
@@ -412,8 +414,10 @@ XTensor Merge(const TensorList &smalls, int whereToMerge)
_Merge
(
&
smalls
,
&
big
,
whereToMerge
);
/* tensor connections */
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE_LIST
);
XLink
::
AddParamToHeadInt
(
&
big
,
whereToMerge
);
if
(
tensor
->
enableGrad
)
{
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE_LIST
);
XLink
::
AddParamToHeadInt
(
&
big
,
whereToMerge
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -453,8 +457,10 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
_Merge
(
&
smalls
,
&
big
,
whereToMerge
);
/* tensor connections */
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE_LIST
);
XLink
::
AddParamToHeadInt
(
&
big
,
whereToMerge
);
if
(
smallA
.
enableGrad
)
{
XLink
::
MakeLink
(
&
smalls
,
&
big
,
SHAPE_MERGE_LIST
);
XLink
::
AddParamToHeadInt
(
&
big
,
whereToMerge
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/shape/Reshape.cpp
查看文件 @
bc49d32a
...
...
@@ -43,9 +43,11 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t
.
Reshape
(
order
,
dimSize
);
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_RESHAPE
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_RESHAPE
);
}
return
t
;
return
t
;
}
void
Reshape
(
XTensor
&
s
,
XTensor
&
t
,
int
order
,
int
*
dimSize
)
...
...
@@ -57,7 +59,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
/* call Reshape function */
t
.
Reshape
(
order
,
dimSize
);
if
(
t
.
enableGrad
)
{
if
(
s
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_RESHAPE
);
}
...
...
source/tensor/core/shape/Split.cpp
查看文件 @
bc49d32a
...
...
@@ -217,9 +217,11 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
_Split
(
&
s
,
&
t
,
whereToSplit
,
splitNum
);
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_SPLIT
);
XLink
::
AddParamToHeadInt
(
&
t
,
whereToSplit
);
XLink
::
AddParamToHeadInt
(
&
t
,
splitNum
);
if
(
s
.
enableGrad
)
{
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_SPLIT
);
XLink
::
AddParamToHeadInt
(
&
t
,
whereToSplit
);
XLink
::
AddParamToHeadInt
(
&
t
,
splitNum
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -251,7 +253,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
/* call _Split function */
_Split
(
&
s
,
&
t
,
whereToSplit
,
splitNum
);
if
(
t
.
enableGrad
)
{
if
(
s
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_SPLIT
);
XLink
::
AddParamToHeadInt
(
&
t
,
whereToSplit
);
...
...
@@ -409,12 +411,15 @@ void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNu
/* tensor connections */
for
(
int
i
=
0
;
i
<
smalls
.
count
;
i
++
){
XTensor
*
s
=
(
XTensor
*
)
smalls
.
Get
(
i
);
XLink
::
MakeLink
(
&
big
,
NULL
,
s
,
SHAPE_SPLIT_LIST
);
XLink
::
AddParamToHeadInt
(
s
,
whereToSplit
);
/* it is tricky here that we keep the id of each
block, rather than the total number of the splits */
XLink
::
AddParamToHeadInt
(
s
,
i
);
if
(
s
->
enableGrad
)
{
XLink
::
MakeLink
(
&
big
,
NULL
,
s
,
SHAPE_SPLIT_LIST
);
XLink
::
AddParamToHeadInt
(
s
,
whereToSplit
);
/* it is tricky here that we keep the id of each
block, rather than the total number of the splits */
XLink
::
AddParamToHeadInt
(
s
,
i
);
}
}
}
...
...
source/tensor/core/shape/Squeeze.cpp
查看文件 @
bc49d32a
...
...
@@ -121,7 +121,9 @@ XTensor Squeeze(XTensor & source, int leadingDim)
_Squeeze
(
&
source
,
&
target
,
leadingDim
);
/* tensor connections */
XLink
::
MakeLink
(
&
source
,
NULL
,
&
target
,
SHAPE_SQUEEZE
);
if
(
source
.
enableGrad
)
{
XLink
::
MakeLink
(
&
source
,
NULL
,
&
target
,
SHAPE_SQUEEZE
);
}
return
target
;
}
...
...
@@ -135,7 +137,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim)
/* call _Squeeze function */
_Squeeze
(
&
source
,
&
target
,
leadingDim
);
if
(
target
.
enableGrad
)
{
if
(
source
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
source
,
NULL
,
&
target
,
SHAPE_SQUEEZE
);
}
...
...
source/tensor/core/shape/Transpose.cpp
查看文件 @
bc49d32a
...
...
@@ -144,9 +144,11 @@ XTensor Transpose(const XTensor &a, const int i, const int j)
_Transpose
(
&
a
,
&
b
,
i
,
j
);
/* tensor connection */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
SHAPE_TRANSPOSE
);
XLink
::
AddParamToHeadInt
(
&
b
,
i
);
XLink
::
AddParamToHeadInt
(
&
b
,
j
);
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
SHAPE_TRANSPOSE
);
XLink
::
AddParamToHeadInt
(
&
b
,
i
);
XLink
::
AddParamToHeadInt
(
&
b
,
j
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/shape/Unsqueeze.cpp
查看文件 @
bc49d32a
...
...
@@ -156,9 +156,11 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
_Unsqueeze
(
&
a
,
&
b
,
dim
,
dSize
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
SHAPE_UNSQUEEZE
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
b
,
dSize
);
if
(
a
.
enableGrad
)
{
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
SHAPE_UNSQUEEZE
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
b
,
dSize
);
}
/* destroy variables */
delete
[]
dimSize
;
...
...
@@ -191,7 +193,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
/* call _Unsqueeze function */
_Unsqueeze
(
&
a
,
&
b
,
dim
,
dSize
);
if
(
b
.
enableGrad
)
{
if
(
a
.
enableGrad
)
{
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
SHAPE_UNSQUEEZE
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
...
...
source/tensor/core/sort/TopK.cu
查看文件 @
bc49d32a
...
...
@@ -377,8 +377,8 @@ get the top-k items
template<class T> __global__
void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T minValue, T * output, int * index)
{
__shared__ CudaHeapNode<T> heapData[(SHARED_MEMORY_SIZE -
1024
* sizeof(T)) / sizeof(CudaHeapNode<T>)];
__shared__ T eachHeapMaxValue[
1024
];
__shared__ CudaHeapNode<T> heapData[(SHARED_MEMORY_SIZE -
512
* sizeof(T)) / sizeof(CudaHeapNode<T>)];
__shared__ T eachHeapMaxValue[
512
];
/*optimization k size the parameter must more than half of k*/
int parameter = 0;
...
...
@@ -429,7 +429,7 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
}
__syncthreads();
/*
to merge the heap use another way
*/
/*
to merge the heap use another way
*/
T minData = minValue;
int heapLimit = heap.count / 2;
if (heapLimit % 2 == 0 && heapLimit != 0) heapLimit -= 1;
...
...
@@ -438,12 +438,13 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
minData = heap.items[counter].value;
}
eachHeapMaxValue[threadIdx.y * blockDim.x + threadIdx.x] = minData;
//need more optimation
if (i == 0) {
int threadLimit =
(threadIdx.y + 1) * blockDim.x
;
int threadLimit =
threadIdx.y * blockDim.x + min(blockDim.x,strideNum)
;
CudaXHeap<MIN_HEAP, T> chooseHeap(k, heapData + k * ((blockDim.x * blockDim.y) + threadIdx.y));
int counter = threadIdx.y * blockDim.x;
for (; counter < threadIdx.y * blockDim.x +
k
; ++counter) {
for (; counter < threadIdx.y * blockDim.x +
min(k, blockDim.x)
; ++counter) {
chooseHeap.Push(counter, eachHeapMaxValue[counter]);
}
for (; counter < threadLimit; ++counter) {
...
...
@@ -451,15 +452,16 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
chooseHeap.ReplaceTop(counter, eachHeapMaxValue[counter]);
}
}
int heapNum = chooseHeap.count;
CudaXHeap<MIN_HEAP, T> ansHeapData(k, k - parameter, heapData + k * chooseHeap.items[0].index);
int miss = parameter;
for (counter = 1; counter <
k
; ++counter) {
for (counter = 1; counter <
heapNum
; ++counter) {
chooseHeap.items[0] = chooseHeap.items[chooseHeap.count - 1];
chooseHeap.count--;
chooseHeap.Down(0);
CudaHeapNode<T> * cmpHeapData = heapData + k * (chooseHeap.items[0].index);
int cmpHeapLimit = 0;
if (counter + heapLimit <= k - parameter){
if (counter + heapLimit <= k - parameter
&& heapNum == k
){
cmpHeapLimit = heapLimit;
}
/* take the max data from the minHeap,so start search from the leaf node */
...
...
@@ -770,22 +772,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum,
/*
if (idx == 0)
{
unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int;
//*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride;
int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{
//for(int i = )
output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j];
}
unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int;
//*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride;
int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{
//for(int i = )
output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j];
}
}
__syncthreads();
*/
...
...
@@ -840,7 +842,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
/* we run the kernel if the heaps can fit into the shared memory */
cudaGrids[1] *= cudaBlocks[1];
cudaBlocks[1] = 1;
if ((cudaBlocks[0] * cudaBlocks[1] + 1) * k * (a->unitSize + sizeof(int)) < SHARED_MEMORY_SIZE) {
if ((cudaBlocks[0] * cudaBlocks[1] + 1) * k * (a->unitSize + sizeof(int))
+ (512 * sizeof(int))
< SHARED_MEMORY_SIZE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelTopK3<DTYPE> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >>>
((DTYPE*)a->data, stride, strideNumA, blockNum, k, DTYPE_MIN,
...
...
@@ -869,7 +871,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
//delete indexA;
int workerNum = WORKERSNUM;
GDevs.GetCudaThread2D(a->
mem->
devID,
GDevs.GetCudaThread2D(a->devID,
workerNum, stride * blockNum, MAX_INT,
cudaGrids, cudaBlocks);
if (a->dataType == DEFAULT_DTYPE) {
...
...
source/tensor/function/DropoutWithIndex.cpp
查看文件 @
bc49d32a
...
...
@@ -81,8 +81,10 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
_ScaleAndShiftMe
(
&
c
,
scale
);
/* tensor connections */
XLink
::
MakeLink
(
&
x
,
&
maskIndex
,
&
c
,
MOVEMENT_DROPOUTWITHINDEX
);
XLink
::
AddParamToHead
(
&
c
,
scale
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
&
maskIndex
,
&
c
,
MOVEMENT_DROPOUTWITHINDEX
);
XLink
::
AddParamToHead
(
&
c
,
scale
);
}
return
c
;
}
...
...
source/tensor/function/HardTanH.cpp
查看文件 @
bc49d32a
...
...
@@ -78,7 +78,9 @@ XTensor HardTanH(const XTensor &x)
_HardTanH
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_HARDTANH
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_HARDTANH
);
}
return
y
;
}
...
...
@@ -92,7 +94,7 @@ void HardTanH(const XTensor &x, XTensor &y)
/* call _HardTanH function */
_HardTanH
(
&
x
,
&
y
);
if
(
y
.
enableGrad
)
{
if
(
x
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_HARDTANH
);
}
...
...
source/tensor/function/Identity.cpp
查看文件 @
bc49d32a
...
...
@@ -54,7 +54,9 @@ XTensor Identity(const XTensor &x)
_Identity
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_IDENTITY
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_IDENTITY
);
}
return
y
;
}
...
...
@@ -68,7 +70,7 @@ void Identity(const XTensor &x, XTensor &y)
/* call _Identity function */
_Identity
(
&
x
,
&
y
);
if
(
y
.
enableGrad
)
{
if
(
x
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_IDENTITY
);
}
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
bc49d32a
...
...
@@ -188,8 +188,10 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
_LogSoftmax
(
&
x
,
&
y
,
ld
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
ld
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
ld
);
}
return
y
;
}
...
...
@@ -215,7 +217,7 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
/* call _LogSoftmax function */
_LogSoftmax
(
&
x
,
&
y
,
ld
);
if
(
y
.
enableGrad
)
{
if
(
x
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
ld
);
...
...
source/tensor/function/Rectify.cpp
查看文件 @
bc49d32a
...
...
@@ -70,7 +70,9 @@ XTensor Rectify(const XTensor &x)
_Rectify
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_RECTIFY
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_RECTIFY
);
}
return
y
;
}
...
...
@@ -84,7 +86,7 @@ void Rectify(const XTensor &x, XTensor &y)
/* call _Rectify function */
_Rectify
(
&
x
,
&
y
);
if
(
y
.
enableGrad
)
{
if
(
x
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_RECTIFY
);
}
...
...
source/tensor/function/Sigmoid.cpp
查看文件 @
bc49d32a
...
...
@@ -73,7 +73,9 @@ XTensor Sigmoid(const XTensor &x)
_Sigmoid
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SIGMOID
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SIGMOID
);
}
return
y
;
}
...
...
@@ -87,7 +89,7 @@ void Sigmoid(const XTensor &x, XTensor &y)
/* call _Sigmoid function */
_Sigmoid
(
&
x
,
&
y
);
if
(
y
.
enableGrad
)
{
if
(
x
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SIGMOID
);
}
...
...
source/tensor/function/Softmax.cpp
查看文件 @
bc49d32a
...
...
@@ -142,8 +142,10 @@ XTensor Softmax(const XTensor &x, int leadDim)
_Softmax
(
&
x
,
&
y
,
ld
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
ld
);
if
(
x
.
enableGrad
)
{
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
ld
);
}
return
y
;
}
...
...
@@ -161,7 +163,7 @@ void Softmax(const XTensor &x, XTensor &y, int leadDim)
/* call _Softmax function */
_Softmax
(
&
x
,
&
y
,
ld
);
if
(
y
.
enableGrad
)
{
if
(
x
.
enableGrad
)
{
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
ld
);
...
...
source/tensor/loss/CrossEntropy.cpp
查看文件 @
bc49d32a
...
...
@@ -277,8 +277,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
tails
.
Add
((
XTensor
*
)
&
gold
);
tails
.
Add
(
weight
);
tails
.
Add
(
padding
);
XLink
::
MakeLink
(
&
tails
,
&
loss
,
LOSS_CROSSENTROPY
);
XLink
::
AddParamToHeadInt
(
&
loss
,
dim
);
if
(
output
.
enableGrad
)
{
XLink
::
MakeLink
(
&
tails
,
&
loss
,
LOSS_CROSSENTROPY
);
XLink
::
AddParamToHeadInt
(
&
loss
,
dim
);
}
return
loss
;
}
...
...
@@ -302,8 +305,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
tails
.
Add
((
XTensor
*
)
&
gold
);
tails
.
Add
(
weight
);
tails
.
Add
((
XTensor
*
)
&
padding
);
XLink
::
MakeLink
(
&
tails
,
&
loss
,
LOSS_CROSSENTROPY
);
XLink
::
AddParamToHeadInt
(
&
loss
,
dim
);
if
(
output
.
enableGrad
)
{
XLink
::
MakeLink
(
&
tails
,
&
loss
,
LOSS_CROSSENTROPY
);
XLink
::
AddParamToHeadInt
(
&
loss
,
dim
);
}
return
loss
;
}
...
...
@@ -677,4 +683,4 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output,
}
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TSetData.cpp
查看文件 @
bc49d32a
...
...
@@ -406,6 +406,68 @@ bool TestSetData5()
#endif // USE_CUDA
}
/*
case 6: test SetDataRange function.
generate data items with a range by start, end and the step
*/
bool
TestSetData6
()
{
/* a input tensor of size (5) */
int
order
=
1
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
5
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
answer
[
5
]
=
{
5.2
F
,
3.2
F
,
1.2
F
,
-
0.8
F
,
-
2.8
F
};
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
s
=
NewTensor
(
order
,
dimSize
);
/* initialize variables */
s
->
SetZeroAll
();
/* call _SetDataRange function */
_SetDataRange
(
s
,
5.2
,
-
3.2
,
-
2
);
/* check results */
cpuTest
=
s
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* initialize variables */
sGPU
->
SetZeroAll
();
/* call _SetDataRange function */
_SetDataRange
(
sGPU
,
5.2
,
-
3.2
,
-
2
);
gpuTest
=
sGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
s
;
delete
sGPU
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
s
;
delete
[]
dimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
...
...
@@ -462,6 +524,15 @@ bool TestSetData()
else
XPRINT
(
0
,
stdout
,
">> case 5 passed!
\n
"
);
/* case 6 test */
caseFlag
=
TestSetData6
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 6 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 6 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
source/tensor/test/TTopK.cpp
查看文件 @
bc49d32a
...
...
@@ -105,10 +105,62 @@ bool TestTopK1()
TopK
(
sUser
,
tUser2
,
indexUser2
,
dim
,
k
);
/* check results */
cpuTest
=
t1
->
CheckData
(
tAnswer1
,
tUnitNum
)
&&
tUser1
.
CheckData
(
tAnswer1
,
tUnitNum
)
&&
t2
->
CheckData
(
tAnswer2
,
tUnitNum
)
&&
tUser2
.
CheckData
(
tAnswer2
,
tUnitNum
)
&&
index1
->
CheckData
(
indexAnswer1
,
tUnitNum
)
&&
indexUser1
.
CheckData
(
indexAnswer1
,
tUnitNum
)
&&
index2
->
CheckData
(
indexAnswer2
,
tUnitNum
)
&&
indexUser2
.
CheckData
(
indexAnswer2
,
tUnitNum
);
for
(
int
i
=
0
;
i
<
tDimSize
[
1
];
++
i
)
{
for
(
int
j
=
0
;
j
<
tDimSize
[
0
];
++
j
)
{
float
tmpData
=
((
float
*
)
t1
->
data
)[
i
+
tDimSize
[
1
]
*
j
];
int
tmpIndex
=
((
int
*
)
index1
->
data
)[
i
+
tDimSize
[
1
]
*
j
];
float
tmpDataUser
=
((
float
*
)
tUser1
.
data
)[
i
+
tDimSize
[
1
]
*
j
];
int
tmpIndexUser
=
((
int
*
)
indexUser1
.
data
)[
i
+
tDimSize
[
1
]
*
j
];
bool
flag
=
false
;
bool
flagUser
=
false
;
for
(
int
k
=
0
;
k
<
tDimSize
[
0
];
++
k
)
{
float
*
ans
=
tAnswer1
[
0
];
int
*
ansIndex
=
indexAnswer1
[
0
];
if
(
tmpData
==
ans
[
i
+
tDimSize
[
1
]
*
k
]
&&
tmpIndex
==
ansIndex
[
i
+
tDimSize
[
1
]
*
k
])
{
flag
=
true
;
}
if
(
tmpDataUser
==
ans
[
i
+
tDimSize
[
1
]
*
k
]
&&
tmpIndexUser
==
ansIndex
[
i
+
tDimSize
[
1
]
*
k
])
{
flagUser
=
true
;
}
}
cpuTest
=
cpuTest
&&
flag
&&
flagUser
;
}
}
for
(
int
i
=
0
;
i
<
tDimSize
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
tDimSize
[
1
];
++
j
)
{
float
tmpData
=
((
float
*
)
t2
->
data
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndex
=
((
int
*
)
index2
->
data
)[
i
*
tDimSize
[
1
]
+
j
];
float
tmpDataUser
=
((
float
*
)
tUser2
.
data
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndexUser
=
((
int
*
)
indexUser2
.
data
)[
i
*
tDimSize
[
1
]
+
j
];
bool
flag
=
false
;
bool
flagUser
=
false
;
for
(
int
k
=
0
;
k
<
tDimSize
[
1
];
++
k
)
{
float
*
ans
=
tAnswer2
[
0
];
int
*
ansIndex
=
indexAnswer2
[
0
];
if
(
tmpData
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndex
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flag
=
true
;
}
if
(
tmpDataUser
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndexUser
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flagUser
=
true
;
}
}
cpuTest
=
cpuTest
&&
flag
&&
flagUser
;
}
}
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -152,10 +204,74 @@ bool TestTopK1()
TopK
(
sUserGPU
,
tUserGPU2
,
indexUserGPU2
,
dim
,
k
);
/* check results */
gpuTest
=
tGPU1
->
CheckData
(
tAnswer1
,
tUnitNum
)
&&
tUserGPU1
.
CheckData
(
tAnswer1
,
tUnitNum
)
&&
tGPU2
->
CheckData
(
tAnswer2
,
tUnitNum
)
&&
tUserGPU2
.
CheckData
(
tAnswer2
,
tUnitNum
)
&&
indexGPU1
->
CheckData
(
indexAnswer1
,
tUnitNum
)
&&
indexUserGPU1
.
CheckData
(
indexAnswer1
,
tUnitNum
)
&&
indexGPU2
->
CheckData
(
indexAnswer2
,
tUnitNum
)
&&
indexUserGPU2
.
CheckData
(
indexAnswer2
,
tUnitNum
);
float
*
checkData
=
new
float
[
tUnitNum
];
int
*
checkIndex
=
new
int
[
tUnitNum
];
float
*
checkDataUser
=
new
float
[
tUnitNum
];
int
*
checkIndexUser
=
new
int
[
tUnitNum
];
cudaMemcpy
(
checkData
,
tGPU1
->
data
,
sizeof
(
DTYPE
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkIndex
,
indexGPU1
->
data
,
sizeof
(
int
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkDataUser
,
tUserGPU1
.
data
,
sizeof
(
DTYPE
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkIndexUser
,
indexUserGPU1
.
data
,
sizeof
(
int
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
tDimSize
[
1
];
++
i
)
{
for
(
int
j
=
0
;
j
<
tDimSize
[
0
];
++
j
)
{
float
tmpData
=
((
float
*
)
checkData
)[
i
+
tDimSize
[
1
]
*
j
];
int
tmpIndex
=
((
int
*
)
checkIndex
)[
i
+
tDimSize
[
1
]
*
j
];
float
tmpDataUser
=
((
float
*
)
checkDataUser
)[
i
+
tDimSize
[
1
]
*
j
];
int
tmpIndexUser
=
((
int
*
)
checkIndexUser
)[
i
+
tDimSize
[
1
]
*
j
];
bool
flag
=
false
;
bool
flagUser
=
false
;
for
(
int
k
=
0
;
k
<
tDimSize
[
0
];
++
k
)
{
float
*
ans
=
tAnswer1
[
0
];
int
*
ansIndex
=
indexAnswer1
[
0
];
if
(
tmpData
==
ans
[
i
+
tDimSize
[
1
]
*
k
]
&&
tmpIndex
==
ansIndex
[
i
+
tDimSize
[
1
]
*
k
])
{
flag
=
true
;
}
if
(
tmpDataUser
==
ans
[
i
+
tDimSize
[
1
]
*
k
]
&&
tmpIndexUser
==
ansIndex
[
i
+
tDimSize
[
1
]
*
k
])
{
flagUser
=
true
;
}
}
gpuTest
=
gpuTest
&&
flag
&&
flagUser
;
}
}
cudaMemcpy
(
checkData
,
tGPU2
->
data
,
sizeof
(
DTYPE
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkIndex
,
indexGPU2
->
data
,
sizeof
(
int
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkDataUser
,
tUserGPU2
.
data
,
sizeof
(
DTYPE
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkIndexUser
,
indexUserGPU2
.
data
,
sizeof
(
int
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
tDimSize
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
tDimSize
[
1
];
++
j
)
{
float
tmpData
=
((
float
*
)
checkData
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndex
=
((
int
*
)
checkIndex
)[
i
*
tDimSize
[
1
]
+
j
];
float
tmpDataUser
=
((
float
*
)
checkDataUser
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndexUser
=
((
int
*
)
checkIndexUser
)[
i
*
tDimSize
[
1
]
+
j
];
bool
flag
=
false
;
bool
flagUser
=
false
;
for
(
int
k
=
0
;
k
<
tDimSize
[
1
];
++
k
)
{
float
*
ans
=
tAnswer2
[
0
];
int
*
ansIndex
=
indexAnswer2
[
0
];
if
(
tmpData
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndex
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flag
=
true
;
}
if
(
tmpDataUser
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndexUser
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flagUser
=
true
;
}
}
gpuTest
=
gpuTest
&&
flag
&&
flagUser
;
}
}
/* destroy variables */
delete
s
;
...
...
@@ -170,6 +286,10 @@ bool TestTopK1()
delete
indexGPU2
;
delete
[]
sDimSize
;
delete
[]
tDimSize
;
delete
[]
checkData
;
delete
[]
checkIndex
;
delete
[]
checkDataUser
;
delete
[]
checkIndexUser
;
return
cpuTest
&&
gpuTest
;
#else
...
...
@@ -247,8 +367,33 @@ bool TestTopK2()
TopK
(
sUser
,
tUser
,
indexUser
,
dim
,
k
);
/* check results */
cpuTest
=
t
->
CheckData
(
tAnswer
,
tUnitNum
)
&&
tUser
.
CheckData
(
tAnswer
,
tUnitNum
)
&&
index
->
CheckData
(
indexAnswer
,
tUnitNum
)
&&
indexUser
.
CheckData
(
indexAnswer
,
tUnitNum
);
for
(
int
i
=
0
;
i
<
tDimSize
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
tDimSize
[
1
];
++
j
)
{
float
tmpData
=
((
float
*
)
t
->
data
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndex
=
((
int
*
)
index
->
data
)[
i
*
tDimSize
[
1
]
+
j
];
float
tmpDataUser
=
((
float
*
)
tUser
.
data
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndexUser
=
((
int
*
)
indexUser
.
data
)[
i
*
tDimSize
[
1
]
+
j
];
bool
flag
=
false
;
bool
flagUser
=
false
;
for
(
int
k
=
0
;
k
<
tDimSize
[
1
];
++
k
)
{
float
*
ans
=
tAnswer
[
0
];
int
*
ansIndex
=
indexAnswer
[
0
];
if
(
tmpData
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndex
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flag
=
true
;
}
if
(
tmpDataUser
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndexUser
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flagUser
=
true
;
}
}
cpuTest
=
cpuTest
&&
flag
&&
flagUser
;
}
}
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -279,8 +424,42 @@ bool TestTopK2()
TopK
(
sUserGPU
,
tUserGPU
,
indexUserGPU
,
dim
,
k
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
tAnswer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
tAnswer
,
tUnitNum
)
&&
indexGPU
->
CheckData
(
indexAnswer
,
tUnitNum
)
&&
indexUserGPU
.
CheckData
(
indexAnswer
,
tUnitNum
);
float
*
checkData
=
new
float
[
tUnitNum
];
int
*
checkIndex
=
new
int
[
tUnitNum
];
float
*
checkDataUser
=
new
float
[
tUnitNum
];
int
*
checkIndexUser
=
new
int
[
tUnitNum
];
cudaMemcpy
(
checkData
,
tGPU
->
data
,
sizeof
(
DTYPE
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkIndex
,
indexGPU
->
data
,
sizeof
(
int
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkDataUser
,
tUserGPU
.
data
,
sizeof
(
DTYPE
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
cudaMemcpy
(
checkIndexUser
,
indexUserGPU
.
data
,
sizeof
(
int
)
*
tUnitNum
,
cudaMemcpyDeviceToHost
);
for
(
int
i
=
0
;
i
<
tDimSize
[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
tDimSize
[
1
];
++
j
)
{
float
tmpData
=
((
float
*
)
checkData
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndex
=
((
int
*
)
checkIndex
)[
i
*
tDimSize
[
1
]
+
j
];
float
tmpDataUser
=
((
float
*
)
checkDataUser
)[
i
*
tDimSize
[
1
]
+
j
];
int
tmpIndexUser
=
((
int
*
)
checkIndexUser
)[
i
*
tDimSize
[
1
]
+
j
];
bool
flag
=
false
;
bool
flagUser
=
false
;
for
(
int
k
=
0
;
k
<
tDimSize
[
1
];
++
k
)
{
float
*
ans
=
tAnswer
[
0
];
int
*
ansIndex
=
indexAnswer
[
0
];
if
(
tmpData
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndex
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flag
=
true
;
}
if
(
tmpDataUser
==
ans
[
i
*
tDimSize
[
1
]
+
k
]
&&
tmpIndexUser
==
ansIndex
[
i
*
tDimSize
[
1
]
+
k
])
{
flagUser
=
true
;
}
}
gpuTest
=
gpuTest
&&
flag
&&
flagUser
;
}
}
/* destroy variables */
delete
s
;
...
...
@@ -291,6 +470,10 @@ bool TestTopK2()
delete
indexGPU
;
delete
[]
sDimSize
;
delete
[]
tDimSize
;
delete
[]
checkData
;
delete
[]
checkIndex
;
delete
[]
checkDataUser
;
delete
[]
checkIndexUser
;
return
cpuTest
&&
gpuTest
;
#else
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论