Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
8
Issues
8
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
NiuTrans
NiuTrans.Tensor
Commits
c9ef15f8
Commit
c9ef15f8
authored
Oct 15, 2019
by
张裕浩
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Remove XTensor RDI representation.
parent
1f4eecdd
隐藏空白字符变更
内嵌
并排
正在显示
42 个修改的文件
包含
374 行增加
和
421 行删除
+374
-421
source/tensor/XTensor.cpp
+14
-26
source/tensor/XTensor.h
+0
-2
source/tensor/core/arithmetic/Div.cpp
+7
-8
source/tensor/core/arithmetic/Div.cu
+8
-9
source/tensor/core/arithmetic/MatrixMul.cpp
+37
-36
source/tensor/core/arithmetic/MatrixMulBatched.cpp
+40
-42
source/tensor/core/arithmetic/Multiply.cpp
+8
-9
source/tensor/core/arithmetic/Multiply.cu
+8
-9
source/tensor/core/arithmetic/Sum.cpp
+1
-1
source/tensor/core/arithmetic/SumByColumnTV.cpp
+3
-3
source/tensor/core/arithmetic/SumByColumnTV.cu
+3
-3
source/tensor/core/arithmetic/SumByColumnVT.cpp
+3
-3
source/tensor/core/arithmetic/SumByColumnVT.cu
+4
-4
source/tensor/core/arithmetic/XTensorBLAS.cpp
+1
-1
source/tensor/core/math/Normalize.cpp
+8
-9
source/tensor/core/math/Normalize.cu
+5
-6
source/tensor/core/movement/CopyInGrid.cpp
+2
-3
source/tensor/core/movement/CopyIndexed.cpp
+13
-11
source/tensor/core/movement/Gather.cu
+6
-8
source/tensor/core/reduce/ReduceMax.cpp
+22
-22
source/tensor/core/reduce/ReduceMax.cu
+9
-10
source/tensor/core/reduce/ReduceMean.cpp
+1
-2
source/tensor/core/reduce/ReduceSum.cpp
+13
-14
source/tensor/core/reduce/ReduceSum.cu
+9
-10
source/tensor/core/reduce/ReduceVariance.cpp
+1
-2
source/tensor/core/reduce/VectorBuffer.cpp
+1
-1
source/tensor/core/shape/ConcatenateSolely.cpp
+12
-13
source/tensor/core/shape/Merge.cpp
+19
-21
source/tensor/core/shape/Split.cpp
+15
-17
source/tensor/core/shape/Unsqueeze.cpp
+7
-8
source/tensor/core/shape/Unsqueeze.cu
+3
-4
source/tensor/core/sort/Sort.cpp
+6
-7
source/tensor/core/sort/Sort.cu
+8
-9
source/tensor/core/sort/TopK.cpp
+12
-13
source/tensor/core/sort/TopK.cu
+6
-7
source/tensor/core/utilities/SetAscendingOrder.cu
+6
-7
source/tensor/function/LogSoftmax.cpp
+15
-17
source/tensor/function/LogSoftmax.cu
+3
-4
source/tensor/function/Loss.cpp
+15
-17
source/tensor/function/Loss.cu
+11
-11
source/tensor/function/Softmax.cpp
+6
-8
source/tensor/function/Softmax.cu
+3
-4
没有找到文件。
source/tensor/XTensor.cpp
查看文件 @
c9ef15f8
...
...
@@ -233,7 +233,6 @@ void XTensor::Init()
devID
=
-
1
;
order
=
-
1
;
memset
(
dimSize
,
0
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
memset
(
dimSizeRDI
,
0
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
dataType
=
DEFAULT_DTYPE
;
unitSize
=
sizeof
(
float
);
unitNum
=
0
;
...
...
@@ -278,7 +277,6 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{
order
=
tensor
.
order
;
memcpy
(
dimSize
,
tensor
.
dimSize
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
memcpy
(
dimSizeRDI
,
tensor
.
dimSizeRDI
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
dataType
=
tensor
.
dataType
;
unitSize
=
tensor
.
unitSize
;
unitNum
=
tensor
.
unitNum
;
...
...
@@ -442,7 +440,7 @@ bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b)
return
false
;
for
(
int
i
=
0
;
i
<
a
->
order
;
i
++
){
if
(
a
->
dimSize
RDI
[
i
]
!=
b
->
dimSizeRDI
[
i
])
if
(
a
->
dimSize
[
i
]
!=
b
->
dimSize
[
i
])
return
false
;
}
...
...
@@ -478,7 +476,6 @@ void XTensor::SetDim(int * myDimSize)
{
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
dimSize
[
i
]
=
myDimSize
[
i
];
dimSizeRDI
[
order
-
i
-
1
]
=
myDimSize
[
i
];
}
}
...
...
@@ -505,20 +502,17 @@ reshape the tensor
void
XTensor
::
Reshape
(
const
int
myOrder
,
const
int
*
myDimSize
)
{
int
dims
[
MAX_TENSOR_DIM_NUM
];
int
dimsRDI
[
MAX_TENSOR_DIM_NUM
];
int
num
=
1
;
for
(
int
i
=
0
;
i
<
myOrder
;
i
++
){
num
*=
myDimSize
[
i
];
dims
[
i
]
=
abs
(
myDimSize
[
i
]);
dimsRDI
[
myOrder
-
i
-
1
]
=
dims
[
i
];
}
CheckNTErrors
(
abs
(
num
)
==
unitNum
,
"Wrong size found when we reshape the tensor!"
);
order
=
myOrder
;
memcpy
(
dimSize
,
dims
,
sizeof
(
int
)
*
order
);
memcpy
(
dimSizeRDI
,
dimsRDI
,
sizeof
(
int
)
*
order
);
}
/*
...
...
@@ -888,7 +882,6 @@ void XTensor::SetAscendingOrder(int dim)
CheckNTErrors
((
dim
>=
0
&&
dim
<
order
),
"Wrong dimension specified!"
);
CheckNTErrors
((
dataType
==
X_INT
),
"TODO!"
);
int
dimRDI
=
order
-
dim
-
1
;
if
(
devID
>=
0
){
#ifdef USE_CUDA
CudaSetAscendingOrder
(
this
,
dim
);
...
...
@@ -898,13 +891,13 @@ void XTensor::SetAscendingOrder(int dim)
}
else
{
int
stride
=
1
;
int
strideNum
=
dimSizeRDI
[
dimRDI
];
for
(
int
i
=
0
;
i
<
dimRDI
;
i
++
)
stride
*=
dimSizeRDI
[
i
];
int
blockNum
=
1
;
for
(
int
i
=
dimRDI
+
1
;
i
<
order
;
i
++
)
blockNum
*=
dimSizeRDI
[
i
];
int
strideNum
=
dimSize
[
dim
];
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
blockNum
*=
dimSize
[
i
];
for
(
int
i
=
dim
+
1
;
i
<
order
;
i
++
)
stride
*=
dimSize
[
i
];
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
){
for
(
int
j
=
0
;
j
<
strideNum
;
j
++
){
...
...
@@ -939,17 +932,13 @@ void * XTensor::GetCell(int index[], int size) const
{
CheckNTErrors
((
size
==
order
),
"Illegal index!"
);
int
*
indexRDI
=
new
int
[
size
];
for
(
int
i
=
0
;
i
<
size
;
i
++
)
indexRDI
[
size
-
i
-
1
]
=
index
[
i
];
int
offset
=
index
RDI
[
size
-
1
];
for
(
int
i
=
size
-
2
;
i
>=
0
;
i
--
){
CheckNTErrors
((
index
RDI
[
i
]
<
dimSizeRDI
[
i
]),
"Index is out of range!"
);
offset
=
offset
*
dimSize
RDI
[
i
]
+
indexRDI
[
i
];
int
offset
=
index
[
0
];
for
(
int
i
=
1
;
i
<
size
;
++
i
){
CheckNTErrors
((
index
[
i
]
<
dimSize
[
i
]),
"Index is out of range!"
);
offset
=
offset
*
dimSize
[
i
]
+
index
[
i
];
}
delete
[]
indexRDI
;
if
(
isSparse
){
DTYPE
value
;
...
...
@@ -1365,7 +1354,6 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
bool
zeroData
=
false
;
for
(
int
i
=
0
;
i
<
order
;
i
++
){
dimSize
[
i
]
=
abs
(
myDimSize
[
i
]);
dimSizeRDI
[
order
-
i
-
1
]
=
dimSize
[
i
];
if
(
myDimSize
[
i
]
<
0
)
filledData
=
false
;
if
(
myDimSize
[
i
]
==
0
)
...
...
@@ -1564,7 +1552,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
if
(
isSparse
)
{
int
num
=
0
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
num
*=
dimSize
RDI
[
i
];
num
*=
dimSize
[
i
];
num
=
int
(
num
*
denseRatio
+
1
);
int
tupleSize
=
sizeof
(
int
)
+
sizeof
(
DTYPE
);
int
size
=
sizeof
(
int
)
+
tupleSize
*
(
num
);
...
...
@@ -1756,8 +1744,8 @@ void XTensor::Read(FILE * file, const char * label)
int
ds
[
MAX_TENSOR_DIM_NUM
];
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
ds
[
i
]
=
key
%
dimSize
RDI
[
i
];
key
/=
dimSize
RDI
[
i
];
ds
[
i
]
=
key
%
dimSize
[
i
];
key
/=
dimSize
[
i
];
}
Set
(
value
,
ds
);
}
...
...
source/tensor/XTensor.h
查看文件 @
c9ef15f8
...
...
@@ -95,8 +95,6 @@ public:
/* size of each dimension */
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
/* size of each dimension by Reversed Dimension Indexing (RDI) Mode */
int
dimSizeRDI
[
MAX_TENSOR_DIM_NUM
];
/* data unit - data type for every cell */
TENSOR_DATA_TYPE
dataType
;
...
...
source/tensor/core/arithmetic/Div.cpp
查看文件 @
c9ef15f8
...
...
@@ -41,7 +41,6 @@ where i is the index of the item
*/
void
_Div
(
const
XTensor
*
a
,
const
XTensor
*
b
,
XTensor
*
c
,
DTYPE
alpha
,
int
leadingDim
)
{
int
leadingDimRDI
=
a
->
order
-
leadingDim
-
1
;
CheckNTErrors
((
a
->
unitNum
<=
c
->
unitNum
&&
b
->
unitNum
<=
c
->
unitNum
),
"Unmatched tensors in multiplication!"
);
CheckNTErrors
((
a
->
order
==
b
->
order
&&
a
->
order
==
c
->
order
),
...
...
@@ -59,17 +58,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
int
blockSizeB
=
1
;
int
blockSizeC
=
1
;
int
blockNum
=
1
;
int
dimensionSizeA
=
a
->
dimSize
RDI
[
leadingDimRDI
];
int
dimensionSizeB
=
b
->
dimSize
RDI
[
leadingDimRDI
];
int
dimensionSizeC
=
c
->
dimSize
RDI
[
leadingDimRDI
];
int
dimensionSizeA
=
a
->
dimSize
[
leadingDim
];
int
dimensionSizeB
=
b
->
dimSize
[
leadingDim
];
int
dimensionSizeC
=
c
->
dimSize
[
leadingDim
];
for
(
int
i
=
0
;
i
<
a
->
order
;
i
++
)
{
if
(
i
!=
leadingDim
RDI
)
{
CheckNTErrors
((
a
->
dimSize
RDI
[
i
]
==
b
->
dimSizeRDI
[
i
]
&&
a
->
dimSizeRDI
[
i
]
==
c
->
dimSizeRDI
[
i
]),
if
(
i
!=
leadingDim
)
{
CheckNTErrors
((
a
->
dimSize
[
i
]
==
b
->
dimSize
[
i
]
&&
a
->
dimSize
[
i
]
==
c
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
if
(
i
<
leadingDimRDI
)
stride
*=
a
->
dimSize
RDI
[
i
];
if
(
i
>
leadingDim
)
stride
*=
a
->
dimSize
[
i
];
}
blockSizeA
=
stride
*
dimensionSizeA
;
...
...
source/tensor/core/arithmetic/Div.cu
查看文件 @
c9ef15f8
...
...
@@ -122,7 +122,6 @@ where i is the item index
*/
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
...
...
@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
int stride = 1;
int blockSizeA = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSize
RDI[leadingDimRDI
];
int dimensionSizeB = b->dimSize
RDI[leadingDimRDI
];
int dimensionSizeC = c->dimSize
RDI[leadingDimRDI
];
int dimensionSizeA = a->dimSize
[leadingDim
];
int dimensionSizeB = b->dimSize
[leadingDim
];
int dimensionSizeC = c->dimSize
[leadingDim
];
for (int i = 0; i < a->order; i++) {
if (i != leadingDim
RDI
) {
CheckNTErrors((a->dimSize
RDI[i] == b->dimSizeRDI
[i] &&
a->dimSize
RDI[i] == c->dimSizeRDI
[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize
[i] == b->dimSize
[i] &&
a->dimSize
[i] == c->dimSize
[i]),
"Unmatched tensors!");
}
if (i
< leadingDimRDI
)
stride *= a->dimSize
RDI
[i];
if (i
> leadingDim
)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
...
...
source/tensor/core/arithmetic/MatrixMul.cpp
查看文件 @
c9ef15f8
...
...
@@ -77,18 +77,18 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
return
;
}
int
an
=
transposedA
==
X_TRANS
?
a
->
dimSize
RDI
[
0
]
:
a
->
dimSizeRDI
[
1
];
int
am
=
transposedA
==
X_TRANS
?
a
->
dimSize
RDI
[
1
]
:
a
->
dimSizeRDI
[
0
];
int
bn
=
transposedB
==
X_TRANS
?
b
->
dimSize
RDI
[
0
]
:
b
->
dimSizeRDI
[
1
];
int
bm
=
transposedB
==
X_TRANS
?
b
->
dimSize
RDI
[
1
]
:
b
->
dimSizeRDI
[
0
];
int
cn
=
c
->
dimSize
RDI
[
1
];
int
cm
=
c
->
dimSize
RDI
[
0
];
int
an
=
transposedA
==
X_TRANS
?
a
->
dimSize
[
a
->
order
-
1
]
:
a
->
dimSize
[
a
->
order
-
2
];
int
am
=
transposedA
==
X_TRANS
?
a
->
dimSize
[
a
->
order
-
2
]
:
a
->
dimSize
[
a
->
order
-
1
];
int
bn
=
transposedB
==
X_TRANS
?
b
->
dimSize
[
b
->
order
-
1
]
:
b
->
dimSize
[
b
->
order
-
2
];
int
bm
=
transposedB
==
X_TRANS
?
b
->
dimSize
[
b
->
order
-
2
]
:
b
->
dimSize
[
b
->
order
-
1
];
int
cn
=
c
->
dimSize
[
c
->
order
-
2
];
int
cm
=
c
->
dimSize
[
c
->
order
-
1
];
CheckNTErrors
((
am
==
bn
&&
an
==
cn
&&
bm
==
cm
),
"Unmatched tensors in multiplication!"
);
int
aBlockSize
=
a
->
dimSize
RDI
[
0
]
*
a
->
dimSizeRDI
[
1
];
int
bBlockSize
=
b
->
dimSize
RDI
[
0
]
*
b
->
dimSizeRDI
[
1
];
int
cBlockSize
=
c
->
dimSize
RDI
[
0
]
*
c
->
dimSizeRDI
[
1
];
int
aBlockSize
=
a
->
dimSize
[
a
->
order
-
1
]
*
a
->
dimSize
[
a
->
order
-
2
];
int
bBlockSize
=
b
->
dimSize
[
b
->
order
-
1
]
*
b
->
dimSize
[
b
->
order
-
2
];
int
cBlockSize
=
c
->
dimSize
[
c
->
order
-
1
]
*
c
->
dimSize
[
c
->
order
-
2
];
int
aRealBlockSize
=
aBlockSize
*
a
->
unitSize
;
int
bRealBlockSize
=
bBlockSize
*
b
->
unitSize
;
int
cRealBlockSize
=
cBlockSize
*
c
->
unitSize
;
...
...
@@ -96,24 +96,25 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int
bBlockNum
=
1
;
int
cBlockNum
=
1
;
for
(
int
i
=
2
;
i
<
a
->
order
;
i
++
)
{
CheckNTErrors
(
a
->
dimSizeRDI
[
i
]
==
c
->
dimSizeRDI
[
i
-
2
+
b
->
order
],
"Incorrect tensor sizes!"
);
aBlockNum
*=
a
->
dimSizeRDI
[
i
];
cBlockNum
*=
a
->
dimSizeRDI
[
i
];
for
(
int
i
=
0
;
i
<
a
->
order
-
2
;
i
++
)
{
CheckNTErrors
(
a
->
dimSize
[
i
]
==
c
->
dimSize
[
i
],
"Incorrect tensor sizes!"
);
aBlockNum
*=
a
->
dimSize
[
i
];
cBlockNum
*=
a
->
dimSize
[
i
];
}
for
(
int
i
=
2
;
i
<
b
->
order
;
i
++
)
{
CheckNTErrors
(
b
->
dimSize
RDI
[
i
]
==
c
->
dimSizeRDI
[
i
],
"Incorrect tensor sizes!"
);
bBlockNum
*=
b
->
dimSize
RDI
[
i
];
cBlockNum
*=
b
->
dimSize
RDI
[
i
];
for
(
int
i
=
0
;
i
<
b
->
order
-
2
;
i
++
)
{
CheckNTErrors
(
b
->
dimSize
[
i
]
==
c
->
dimSize
[
i
-
2
+
a
->
order
],
"Incorrect tensor sizes!"
);
bBlockNum
*=
b
->
dimSize
[
i
];
cBlockNum
*=
b
->
dimSize
[
i
];
}
XList
*
aList
=
new
XList
(
10
);
XList
*
bList
=
new
XList
(
10
);
XList
*
cList
=
new
XList
(
10
);
int
aDimSize
[
2
]
=
{
-
a
->
dimSize
RDI
[
1
],
a
->
dimSizeRDI
[
0
]
};
int
bDimSize
[
2
]
=
{
-
b
->
dimSize
RDI
[
1
],
b
->
dimSizeRDI
[
0
]
};
int
cDimSize
[
2
]
=
{
-
c
->
dimSize
RDI
[
1
],
c
->
dimSizeRDI
[
0
]
};
int
aDimSize
[
2
]
=
{
-
a
->
dimSize
[
a
->
order
-
2
],
a
->
dimSize
[
a
->
order
-
1
]
};
int
bDimSize
[
2
]
=
{
-
b
->
dimSize
[
b
->
order
-
2
],
b
->
dimSize
[
b
->
order
-
1
]
};
int
cDimSize
[
2
]
=
{
-
c
->
dimSize
[
c
->
order
-
2
],
c
->
dimSize
[
c
->
order
-
1
]
};
bool
isSparseMul
=
false
;
...
...
@@ -230,20 +231,20 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
int
an
=
transposedA
==
X_TRANS
?
a
.
dimSize
RDI
[
0
]
:
a
.
dimSizeRDI
[
1
];
int
am
=
transposedA
==
X_TRANS
?
a
.
dimSize
RDI
[
1
]
:
a
.
dimSizeRDI
[
0
];
int
bn
=
transposedB
==
X_TRANS
?
b
.
dimSize
RDI
[
0
]
:
b
.
dimSizeRDI
[
1
];
int
bm
=
transposedB
==
X_TRANS
?
b
.
dimSize
RDI
[
1
]
:
b
.
dimSizeRDI
[
0
];
int
an
=
transposedA
==
X_TRANS
?
a
.
dimSize
[
a
.
order
-
1
]
:
a
.
dimSize
[
a
.
order
-
2
];
int
am
=
transposedA
==
X_TRANS
?
a
.
dimSize
[
a
.
order
-
2
]
:
a
.
dimSize
[
a
.
order
-
1
];
int
bn
=
transposedB
==
X_TRANS
?
b
.
dimSize
[
b
.
order
-
1
]
:
b
.
dimSize
[
b
.
order
-
2
];
int
bm
=
transposedB
==
X_TRANS
?
b
.
dimSize
[
b
.
order
-
2
]
:
b
.
dimSize
[
b
.
order
-
1
];
CheckNTErrors
(
am
==
bn
,
"Unmatched tensors in multiplication!"
);
int
order
=
a
.
order
+
b
.
order
-
2
;
int
sub
=
0
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
2
;
i
<
a
.
order
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
RDI
[
a
.
order
+
1
-
i
];
for
(
int
i
=
2
;
i
<
b
.
order
;
i
++
)
dimSize
[
sub
++
]
=
b
.
dimSize
RDI
[
b
.
order
+
1
-
i
];
for
(
int
i
=
0
;
i
<
a
.
order
-
2
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
[
i
];
for
(
int
i
=
0
;
i
<
b
.
order
-
2
;
i
++
)
dimSize
[
sub
++
]
=
b
.
dimSize
[
i
];
dimSize
[
sub
++
]
=
an
;
dimSize
[
sub
++
]
=
bm
;
...
...
@@ -280,20 +281,20 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
int
an
=
a
.
dimSize
RDI
[
1
];
int
am
=
a
.
dimSize
RDI
[
0
];
int
bn
=
b
.
dimSize
RDI
[
1
];
int
bm
=
b
.
dimSize
RDI
[
0
];
int
an
=
a
.
dimSize
[
a
.
order
-
2
];
int
am
=
a
.
dimSize
[
a
.
order
-
1
];
int
bn
=
b
.
dimSize
[
b
.
order
-
2
];
int
bm
=
b
.
dimSize
[
b
.
order
-
1
];
CheckNTErrors
(
am
==
bn
,
"Unmatched tensors in multiplication!"
);
int
order
=
a
.
order
+
b
.
order
-
2
;
int
sub
=
0
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
2
;
i
<
a
.
order
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
RDI
[
a
.
order
+
1
-
i
];
for
(
int
i
=
2
;
i
<
b
.
order
;
i
++
)
dimSize
[
sub
++
]
=
b
.
dimSize
RDI
[
b
.
order
+
1
-
i
];
for
(
int
i
=
0
;
i
<
a
.
order
-
2
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
[
i
];
for
(
int
i
=
0
;
i
<
b
.
order
-
2
;
i
++
)
dimSize
[
sub
++
]
=
b
.
dimSize
[
i
];
dimSize
[
sub
++
]
=
an
;
dimSize
[
sub
++
]
=
bm
;
...
...
source/tensor/core/arithmetic/MatrixMulBatched.cpp
查看文件 @
c9ef15f8
...
...
@@ -56,7 +56,6 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
((
a
->
order
==
b
->
order
&&
a
->
order
==
c
->
order
),
"Input tensor and output tensor must have same order!"
);
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
_MatrixMulBatchedGPU
(
a
,
transposedA
,
b
,
transposedB
,
c
,
alpha
,
beta
);
else
...
...
@@ -94,27 +93,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!"
);
CheckNTErrors
(
a
->
devID
>=
0
&&
b
->
devID
>=
0
&&
c
->
devID
>=
0
,
"The tensors must be on GPUs"
);
int
an
=
transposedA
==
X_TRANS
?
a
->
dimSize
RDI
[
0
]
:
a
->
dimSizeRDI
[
1
];
int
am
=
transposedA
==
X_TRANS
?
a
->
dimSize
RDI
[
1
]
:
a
->
dimSizeRDI
[
0
];
int
bn
=
transposedB
==
X_TRANS
?
b
->
dimSize
RDI
[
0
]
:
b
->
dimSizeRDI
[
1
];
int
bm
=
transposedB
==
X_TRANS
?
b
->
dimSize
RDI
[
1
]
:
b
->
dimSizeRDI
[
0
];
int
cn
=
c
->
dimSize
RDI
[
1
];
int
cm
=
c
->
dimSize
RDI
[
0
];
int
an
=
transposedA
==
X_TRANS
?
a
->
dimSize
[
a
->
order
-
1
]
:
a
->
dimSize
[
a
->
order
-
2
];
int
am
=
transposedA
==
X_TRANS
?
a
->
dimSize
[
a
->
order
-
2
]
:
a
->
dimSize
[
a
->
order
-
1
];
int
bn
=
transposedB
==
X_TRANS
?
b
->
dimSize
[
b
->
order
-
1
]
:
b
->
dimSize
[
b
->
order
-
2
];
int
bm
=
transposedB
==
X_TRANS
?
b
->
dimSize
[
b
->
order
-
2
]
:
b
->
dimSize
[
b
->
order
-
1
];
int
cn
=
c
->
dimSize
[
c
->
order
-
2
];
int
cm
=
c
->
dimSize
[
c
->
order
-
1
];
CheckNTErrors
((
am
==
bn
&&
an
==
cn
&&
bm
==
cm
),
"Unmatched tensors in multiplication!"
);
int
aBlockSize
=
a
->
dimSize
RDI
[
0
]
*
a
->
dimSizeRDI
[
1
];
int
bBlockSize
=
b
->
dimSize
RDI
[
0
]
*
b
->
dimSizeRDI
[
1
];
int
cBlockSize
=
c
->
dimSize
RDI
[
0
]
*
c
->
dimSizeRDI
[
1
];
int
aBlockSize
=
a
->
dimSize
[
a
->
order
-
1
]
*
a
->
dimSize
[
a
->
order
-
2
];
int
bBlockSize
=
b
->
dimSize
[
b
->
order
-
1
]
*
b
->
dimSize
[
b
->
order
-
2
];
int
cBlockSize
=
c
->
dimSize
[
c
->
order
-
1
]
*
c
->
dimSize
[
c
->
order
-
2
];
int
aRealBlockSize
=
aBlockSize
*
a
->
unitSize
;
int
bRealBlockSize
=
bBlockSize
*
b
->
unitSize
;
int
cRealBlockSize
=
cBlockSize
*
c
->
unitSize
;
int
blockNum
=
1
;
for
(
int
i
=
2
;
i
<
a
->
order
;
i
++
)
{
CheckNTErrors
((
a
->
dimSize
RDI
[
i
]
==
c
->
dimSizeRDI
[
i
]),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
b
->
dimSize
RDI
[
i
]
==
c
->
dimSizeRDI
[
i
]),
"Incorrect tensor sizes!"
);
blockNum
*=
a
->
dimSize
RDI
[
i
];
for
(
int
i
=
0
;
i
<
a
->
order
-
2
;
i
++
)
{
CheckNTErrors
((
a
->
dimSize
[
i
]
==
c
->
dimSize
[
i
]),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
b
->
dimSize
[
i
]
==
c
->
dimSize
[
i
]),
"Incorrect tensor sizes!"
);
blockNum
*=
a
->
dimSize
[
i
];
}
int
devIDBackup
=
0
;
...
...
@@ -125,9 +124,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
a
->
data
,
transposedA
,
a
->
dataType
,
aBlockSize
,
b
->
data
,
transposedB
,
b
->
dataType
,
bBlockSize
,
c
->
data
,
c
->
dataType
,
cBlockSize
,
blockNum
,
a
->
dimSize
RDI
[
1
],
a
->
dimSizeRDI
[
0
],
b
->
dimSize
RDI
[
1
],
b
->
dimSizeRDI
[
0
],
c
->
dimSize
RDI
[
1
],
c
->
dimSizeRDI
[
0
],
alpha
,
beta
);
a
->
dimSize
[
a
->
order
-
2
],
a
->
dimSize
[
a
->
order
-
1
],
b
->
dimSize
[
b
->
order
-
2
],
b
->
dimSize
[
b
->
order
-
1
],
c
->
dimSize
[
c
->
order
-
2
],
c
->
dimSize
[
c
->
order
-
1
],
alpha
,
beta
);
BacktoCudaDev
(
a
->
devID
,
devIDBackup
);
#endif
...
...
@@ -163,32 +162,32 @@ CheckNTErrors((a && b && c), "Empty input tensors!");
"Input tensor and output tensor must have same order!"
);
int
an
=
transposedA
==
X_TRANS
?
a
->
dimSize
RDI
[
0
]
:
a
->
dimSizeRDI
[
1
];
int
am
=
transposedA
==
X_TRANS
?
a
->
dimSize
RDI
[
1
]
:
a
->
dimSizeRDI
[
0
];
int
bn
=
transposedB
==
X_TRANS
?
b
->
dimSize
RDI
[
0
]
:
b
->
dimSizeRDI
[
1
];
int
bm
=
transposedB
==
X_TRANS
?
b
->
dimSize
RDI
[
1
]
:
b
->
dimSizeRDI
[
0
];
int
cn
=
c
->
dimSize
RDI
[
1
];
int
cm
=
c
->
dimSize
RDI
[
0
];
int
an
=
transposedA
==
X_TRANS
?
a
->
dimSize
[
a
->
order
-
1
]
:
a
->
dimSize
[
a
->
order
-
2
];
int
am
=
transposedA
==
X_TRANS
?
a
->
dimSize
[
a
->
order
-
2
]
:
a
->
dimSize
[
a
->
order
-
1
];
int
bn
=
transposedB
==
X_TRANS
?
b
->
dimSize
[
b
->
order
-
1
]
:
b
->
dimSize
[
b
->
order
-
2
];
int
bm
=
transposedB
==
X_TRANS
?
b
->
dimSize
[
b
->
order
-
2
]
:
b
->
dimSize
[
b
->
order
-
1
];
int
cn
=
c
->
dimSize
[
c
->
order
-
2
];
int
cm
=
c
->
dimSize
[
c
->
order
-
1
];
CheckNTErrors
(
am
==
bn
&&
an
==
cn
&&
bm
==
cm
,
"Unmatched tensors in multiplication!"
);
int
aBlockSize
=
a
->
dimSize
RDI
[
0
]
*
a
->
dimSizeRDI
[
1
];
int
bBlockSize
=
b
->
dimSize
RDI
[
0
]
*
b
->
dimSizeRDI
[
1
];
int
cBlockSize
=
c
->
dimSize
RDI
[
0
]
*
c
->
dimSizeRDI
[
1
];
int
aBlockSize
=
a
->
dimSize
[
a
->
order
-
1
]
*
a
->
dimSize
[
a
->
order
-
2
];
int
bBlockSize
=
b
->
dimSize
[
b
->
order
-
1
]
*
b
->
dimSize
[
b
->
order
-
2
];
int
cBlockSize
=
c
->
dimSize
[
c
->
order
-
1
]
*
c
->
dimSize
[
c
->
order
-
2
];
int
aRealBlockSize
=
aBlockSize
*
a
->
unitSize
;
int
bRealBlockSize
=
bBlockSize
*
b
->
unitSize
;
int
cRealBlockSize
=
cBlockSize
*
c
->
unitSize
;
int
blockNum
=
1
;
for
(
int
i
=
2
;
i
<
a
->
order
;
i
++
)
{
CheckNTErrors
((
a
->
dimSize
RDI
[
i
]
==
c
->
dimSizeRDI
[
i
]),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
b
->
dimSize
RDI
[
i
]
==
c
->
dimSizeRDI
[
i
]),
"Incorrect tensor sizes!"
);
blockNum
*=
a
->
dimSize
RDI
[
i
];
for
(
int
i
=
0
;
i
<
a
->
order
-
2
;
i
++
)
{
CheckNTErrors
((
a
->
dimSize
[
i
]
==
c
->
dimSize
[
i
]),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
b
->
dimSize
[
i
]
==
c
->
dimSize
[
i
]),
"Incorrect tensor sizes!"
);
blockNum
*=
a
->
dimSize
[
i
];
}
int
aDimSize
[
2
]
=
{
-
a
->
dimSize
RDI
[
1
],
a
->
dimSizeRDI
[
0
]};
int
bDimSize
[
2
]
=
{
-
b
->
dimSize
RDI
[
1
],
b
->
dimSizeRDI
[
0
]};
int
cDimSize
[
2
]
=
{
-
c
->
dimSize
RDI
[
1
],
c
->
dimSizeRDI
[
0
]};
int
aDimSize
[
2
]
=
{
-
a
->
dimSize
[
a
->
order
-
2
],
a
->
dimSize
[
a
->
order
-
1
]};
int
bDimSize
[
2
]
=
{
-
b
->
dimSize
[
b
->
order
-
2
],
b
->
dimSize
[
b
->
order
-
1
]};
int
cDimSize
[
2
]
=
{
-
c
->
dimSize
[
c
->
order
-
2
],
c
->
dimSize
[
c
->
order
-
1
]};
XTensor
*
ai
=
NewTensor2D
(
aDimSize
[
0
],
aDimSize
[
1
],
a
->
dataType
,
a
->
devID
,
a
->
mem
);
XTensor
*
bi
=
NewTensor2D
(
bDimSize
[
0
],
bDimSize
[
1
],
b
->
dataType
,
b
->
devID
,
b
->
mem
);
...
...
@@ -230,7 +229,6 @@ void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
{
CheckNTErrors
(
a
&&
b
&&
c
,
"Empty input lists!"
);
CheckNTErrors
(
a
->
count
==
b
->
count
&&
a
->
count
==
c
->
count
,
"Input lists must be of the same size!"
);
if
(
a
->
count
==
0
)
return
;
...
...
@@ -291,10 +289,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
int
an
=
transposedA
==
X_TRANS
?
a
.
dimSize
RDI
[
0
]
:
a
.
dimSizeRDI
[
1
];
int
am
=
transposedA
==
X_TRANS
?
a
.
dimSize
RDI
[
1
]
:
a
.
dimSizeRDI
[
0
];
int
bn
=
transposedB
==
X_TRANS
?
b
.
dimSize
RDI
[
0
]
:
b
.
dimSizeRDI
[
1
];
int
bm
=
transposedB
==
X_TRANS
?
b
.
dimSize
RDI
[
1
]
:
b
.
dimSizeRDI
[
0
];
int
an
=
transposedA
==
X_TRANS
?
a
.
dimSize
[
a
.
order
-
1
]
:
a
.
dimSize
[
a
.
order
-
2
];
int
am
=
transposedA
==
X_TRANS
?
a
.
dimSize
[
a
.
order
-
2
]
:
a
.
dimSize
[
a
.
order
-
1
];
int
bn
=
transposedB
==
X_TRANS
?
b
.
dimSize
[
b
.
order
-
1
]
:
b
.
dimSize
[
b
.
order
-
2
];
int
bm
=
transposedB
==
X_TRANS
?
b
.
dimSize
[
b
.
order
-
2
]
:
b
.
dimSize
[
b
.
order
-
1
];
CheckNTErrors
(
am
==
bn
,
"Unmatched tensors in multiplication!"
);
...
...
@@ -347,10 +345,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
int
an
=
a
.
dimSize
RDI
[
1
];
int
am
=
a
.
dimSize
RDI
[
0
];
int
bn
=
b
.
dimSize
RDI
[
1
];
int
bm
=
b
.
dimSize
RDI
[
0
];
int
an
=
a
.
dimSize
[
a
.
order
-
2
];
int
am
=
a
.
dimSize
[
a
.
order
-
1
];
int
bn
=
b
.
dimSize
[
b
.
order
-
2
];
int
bm
=
b
.
dimSize
[
b
.
order
-
1
];
CheckNTErrors
(
am
==
bn
,
"Unmatched tensors in multiplication!"
);
...
...
source/tensor/core/arithmetic/Multiply.cpp
查看文件 @
c9ef15f8
...
...
@@ -41,7 +41,6 @@ where i is the index of the item
*/
void
_Multiply
(
const
XTensor
*
a
,
const
XTensor
*
b
,
XTensor
*
c
,
DTYPE
alpha
,
int
leadingDim
)
{
int
leadingDimRDI
=
a
->
order
-
leadingDim
-
1
;
CheckNTErrors
((
a
->
unitNum
<=
c
->
unitNum
&&
b
->
unitNum
<=
c
->
unitNum
),
"Unmatched tensors in multiplication!"
);
CheckNTErrors
((
a
->
order
==
b
->
order
&&
a
->
order
==
c
->
order
),
...
...
@@ -59,18 +58,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
int
blockSizeB
=
1
;
int
blockSizeC
=
1
;
int
blockNum
=
1
;
int
dimensionSizeA
=
a
->
dimSize
RDI
[
leadingDimRDI
];
int
dimensionSizeB
=
b
->
dimSize
RDI
[
leadingDimRDI
];
int
dimensionSizeC
=
c
->
dimSize
RDI
[
leadingDimRDI
];
int
dimensionSizeA
=
a
->
dimSize
[
leadingDim
];
int
dimensionSizeB
=
b
->
dimSize
[
leadingDim
];
int
dimensionSizeC
=
c
->
dimSize
[
leadingDim
];
for
(
int
i
=
0
;
i
<
a
->
order
;
i
++
)
{
if
(
i
!=
leadingDim
RDI
)
{
CheckNTErrors
((
a
->
dimSize
RDI
[
i
]
==
b
->
dimSizeRDI
[
i
]
&&
a
->
dimSize
RDI
[
i
]
==
c
->
dimSizeRDI
[
i
]),
if
(
i
!=
leadingDim
)
{
CheckNTErrors
((
a
->
dimSize
[
i
]
==
b
->
dimSize
[
i
]
&&
a
->
dimSize
[
i
]
==
c
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
if
(
i
<
leadingDimRDI
)
stride
*=
a
->
dimSize
RDI
[
i
];
if
(
i
>
leadingDim
)
stride
*=
a
->
dimSize
[
i
];
}
blockSizeA
=
stride
*
dimensionSizeA
;
...
...
source/tensor/core/arithmetic/Multiply.cu
查看文件 @
c9ef15f8
...
...
@@ -122,7 +122,6 @@ where i is the item index
*/
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
...
...
@@ -130,18 +129,18 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph
int stride = 1;
int blockSizeA = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSize
RDI[leadingDimRDI
];
int dimensionSizeB = b->dimSize
RDI[leadingDimRDI
];
int dimensionSizeC = c->dimSize
RDI[leadingDimRDI
];
int dimensionSizeA = a->dimSize
[leadingDim
];
int dimensionSizeB = b->dimSize
[leadingDim
];
int dimensionSizeC = c->dimSize
[leadingDim
];
for (int i = 0; i < a->order; i++) {
if (i != leadingDim
RDI
) {
CheckNTErrors((a->dimSize
RDI[i] == b->dimSizeRDI
[i] &&
a->dimSize
RDI[i] == c->dimSizeRDI
[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize
[i] == b->dimSize
[i] &&
a->dimSize
[i] == c->dimSize
[i]),
"Unmatched tensors!");
}
if (i
< leadingDimRDI
)
stride *= a->dimSize
RDI
[i];
if (i
> leadingDim
)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
...
...
source/tensor/core/arithmetic/Sum.cpp
查看文件 @
c9ef15f8
...
...
@@ -75,7 +75,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
else
{
if
(
!
a
->
isSparse
&&
!
b
->
isSparse
)
{
CheckNTErrors
(
!
c
->
isSparse
,
"Illegal use of sparse tensor in addition!"
);
if
(
a
->
dataType
==
DEFAULT_DTYPE
&&
b
->
dataType
==
DEFAULT_DTYPE
&&
c
->
dataType
==
DEFAULT_DTYPE
)
...
...
source/tensor/core/arithmetic/SumByColumnTV.cpp
查看文件 @
c9ef15f8
...
...
@@ -41,14 +41,14 @@ void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE bet
{
CheckNTErrors
((
a
&&
b
&&
c
),
"Empty input tensors!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
c
)),
"Unmatched tensors in addition!"
);
CheckNTErrors
((
b
->
order
==
2
&&
b
->
dimSize
RDI
[
0
]
==
1
&&
b
->
dimSizeRDI
[
1
]
==
a
->
dimSizeRDI
[
1
]),
CheckNTErrors
((
b
->
order
==
2
&&
b
->
dimSize
[
b
->
order
-
1
]
==
1
&&
b
->
dimSize
[
b
->
order
-
2
]
==
a
->
dimSize
[
a
->
order
-
2
]),
"Illegal input vector size!"
);
int
rowNum
=
a
->
dimSize
[
0
];
int
colNum
=
a
->
dimSize
[
1
];
int
blockNum
=
1
;
for
(
int
i
=
2
;
i
<
a
->
order
;
i
++
)
blockNum
*=
a
->
dimSize
RDI
[
i
];
for
(
int
i
=
0
;
i
<
a
->
order
-
2
;
i
++
)
blockNum
*=
a
->
dimSize
[
i
];
int
blockSize
=
colNum
*
rowNum
;
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
{
...
...
source/tensor/core/arithmetic/SumByColumnTV.cu
查看文件 @
c9ef15f8
...
...
@@ -68,7 +68,7 @@ void _CudaSumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((b->order == 2 && b->dimSize
RDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1
]),
CheckNTErrors((b->order == 2 && b->dimSize
[b->order - 1] == 1 && b->dimSize[b->order - 2] == a->dimSize[a->order - 2
]),
"Illegal input vector size!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE), "TODO");
...
...
@@ -76,8 +76,8 @@ void _CudaSumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
int rowNum = a->dimSize[0];
int colNum = a->dimSize[1];
int blockNum = 1;
for (int i =
2; i < a->order
; i++)
blockNum *= a->dimSize
RDI
[i];
for (int i =
0; i < a->order - 2
; i++)
blockNum *= a->dimSize[i];
int cudaGridSize[3];
int cudaBlockSize[3];
...
...
source/tensor/core/arithmetic/SumByColumnVT.cpp
查看文件 @
c9ef15f8
...
...
@@ -41,7 +41,7 @@ void _SumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE bet
{
CheckNTErrors
((
a
&&
b
&&
c
),
"Empty input tensors!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
c
)),
"Unmatched tensors in addition!"
);
CheckNTErrors
((
a
->
order
==
2
&&
a
->
dimSize
RDI
[
0
]
==
1
&&
b
->
dimSizeRDI
[
1
]
==
a
->
dimSizeRDI
[
1
]),
CheckNTErrors
((
a
->
order
==
2
&&
a
->
dimSize
[
a
->
order
-
1
]
==
1
&&
b
->
dimSize
[
b
->
order
-
2
]
==
a
->
dimSize
[
a
->
order
-
2
]),
"Illegal input vector size!"
);
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
{
...
...
@@ -53,8 +53,8 @@ void _SumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE bet
int
rowNum
=
b
->
dimSize
[
0
];
int
colNum
=
b
->
dimSize
[
1
];
int
blockNum
=
1
;
for
(
int
i
=
2
;
i
<
b
->
order
;
i
++
)
blockNum
*=
b
->
dimSize
RDI
[
i
];
for
(
int
i
=
0
;
i
<
b
->
order
-
2
;
i
++
)
blockNum
*=
b
->
dimSize
[
i
];
int
blockSize
=
colNum
*
rowNum
;
if
(
!
a
->
isSparse
&&
!
b
->
isSparse
)
{
...
...
source/tensor/core/arithmetic/SumByColumnVT.cu
查看文件 @
c9ef15f8
...
...
@@ -84,7 +84,7 @@ void _CudaSumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((a->order == 2 && a->dimSize
RDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1
]),
CheckNTErrors((a->order == 2 && a->dimSize
[a->order - 1] == 1 && b->dimSize[b->order - 2] == a->dimSize[a->order - 2
]),
"Illegal input vector size!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE), "TODO");
...
...
@@ -92,13 +92,13 @@ void _CudaSumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
int rowNum = b->dimSize[0];
int colNum = b->dimSize[1];
int blockNum = 1;
for (int i =
2; i < b->order
; i++)
blockNum *= b->dimSize
RDI
[i];
for (int i =
0; i < b->order - 2
; i++)
blockNum *= b->dimSize[i];
int cudaGridSize[3];
int cudaBlockSize[3];
GDevs.GetCudaThread(c->devID, a->dimSize
RDI[1
], cudaGridSize, cudaBlockSize);
GDevs.GetCudaThread(c->devID, a->dimSize
[a->order - 2
], cudaGridSize, cudaBlockSize);
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
...
...
source/tensor/core/arithmetic/XTensorBLAS.cpp
查看文件 @
c9ef15f8
...
...
@@ -54,7 +54,7 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int
bm
=
b
->
dimSize
[
1
];
int
cn
=
c
->
dimSize
[
0
];
int
cm
=
c
->
dimSize
[
1
];
if
(
transposedA
==
X_NOTRANS
&&
transposedB
==
X_NOTRANS
)
GEMM
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
cn
,
cm
,
am
,
alpha
,
(
DTYPE
*
)
a
->
data
,
am
,
(
DTYPE
*
)
b
->
data
,
bm
,
beta
,
(
DTYPE
*
)
c
->
data
,
cm
);
else
if
(
transposedA
==
X_TRANS
&&
transposedB
==
X_NOTRANS
)
...
...
source/tensor/core/math/Normalize.cpp
查看文件 @
c9ef15f8
...
...
@@ -44,26 +44,25 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
*/
void
_Normalize
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
,
const
XTensor
*
var
,
const
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
epsilon
)
{
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
((
XTensor
::
IsSameShaped
(
input
,
output
)),
"Unmatched input tensors!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Unmatched input tensors"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
mean
,
var
)),
"Unmatched input tensors"
);
CheckNTErrors
((
input
&&
output
&&
mean
&&
var
&&
a
&&
b
),
"Empty input tensors!"
);
CheckNTErrors
((
dim
RDI
>=
0
&&
dimRDI
<
input
->
order
),
"Incorrect reduction dimension!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
->
order
),
"Incorrect reduction dimension!"
);
CheckNTErrors
((
input
->
order
==
mean
->
order
+
1
),
"Incorrect reduction dimension!"
);
int
stride
=
1
;
int
strideNum
=
input
->
dimSize
RDI
[
dimRDI
];
int
strideNum
=
input
->
dimSize
[
dim
];
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
)
{
if
(
i
<
dim
RDI
)
{
CheckNTErrors
((
input
->
dimSize
RDI
[
i
]
==
mean
->
dimSizeRDI
[
i
]),
"Wrong size!"
);
stride
*=
input
->
dimSizeRDI
[
i
];
if
(
i
<
dim
)
{
CheckNTErrors
((
input
->
dimSize
[
i
]
==
mean
->
dimSize
[
i
]),
"Wrong size!"
);
blockNum
*=
input
->
dimSize
[
i
];
}
else
if
(
i
>
dim
RDI
)
{
CheckNTErrors
((
input
->
dimSize
RDI
[
i
]
==
mean
->
dimSizeRDI
[
i
-
1
]),
"Wrong size!"
);
blockNum
*=
input
->
dimSizeRDI
[
i
];
else
if
(
i
>
dim
)
{
CheckNTErrors
((
input
->
dimSize
[
i
]
==
mean
->
dimSize
[
i
-
1
]),
"Wrong size!"
);
stride
*=
input
->
dimSize
[
i
];
}
}
blockSize
=
stride
*
strideNum
;
...
...
source/tensor/core/math/Normalize.cu
查看文件 @
c9ef15f8
...
...
@@ -95,15 +95,14 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1;
int stride = 1;
int strideNum = input->dimSize
RDI[dimRDI
];
int strideNum = input->dimSize
[dim
];
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i
< dimRDI
)
stride *= input->dimSize
RDI
[i];
else if (i
> dimRDI
)
blockNum *= input->dimSize
RDI
[i];
if (i
> dim
)
stride *= input->dimSize[i];
else if (i
< dim
)
blockNum *= input->dimSize[i];
}
int cudaGridSize[3];
...
...
source/tensor/core/movement/CopyInGrid.cpp
查看文件 @
c9ef15f8
...
...
@@ -40,12 +40,11 @@ void _CopyInGrid(const XTensor * s, XTensor * t, int * index, int blockDim, int
{
CheckNTErrors
((
XTensor
::
IsSameShaped
(
s
,
t
)),
"Unmatched tensors!"
);
int
blockDimRDI
=
s
->
order
-
blockDim
-
1
;
int
blockSize
=
1
;
int
blockNum
=
blockNumInGrid
;
int
gridNum
=
1
;
for
(
int
i
=
0
;
i
<
blockDimRDI
;
i
++
)
blockSize
*=
s
->
dimSize
RDI
[
i
];
for
(
int
i
=
blockDim
;
i
<
s
->
order
;
i
++
)
blockSize
*=
s
->
dimSize
[
i
];
CheckNTErrors
((
s
->
unitNum
%
(
blockSize
*
blockNum
)
==
0
),
"Illegal block number!"
);
gridNum
=
s
->
unitNum
/
(
blockSize
*
blockNum
);
...
...
source/tensor/core/movement/CopyIndexed.cpp
查看文件 @
c9ef15f8
...
...
@@ -52,26 +52,28 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
CheckNTErrors
((
dim
<
s
->
order
&&
dim
<
t
->
order
),
"A too larget dimension specified!"
);
CheckNTErrors
((
s
->
unitSize
==
t
->
unitSize
),
"Unmatched tensors!"
);
int
dimRDI
=
s
->
order
-
dim
-
1
;
int
blockSizeSrc
=
1
;
int
blockSizeTgt
=
1
;
int
blockNumSrc
=
1
;
int
blockNumTgt
=
1
;
int
leadDimSizeSrc
=
s
->
dimSize
RDI
[
dimRDI
];
int
leadDimSizeTgt
=
t
->
dimSize
RDI
[
dimRDI
];
int
leadDimSizeSrc
=
s
->
dimSize
[
dim
];
int
leadDimSizeTgt
=
t
->
dimSize
[
dim
];
int
indexOffsetNum
=
1
;
for
(
int
i
=
0
;
i
<
dimRDI
;
i
++
)
{
blockSizeSrc
*=
s
->
dimSizeRDI
[
i
];
blockSizeTgt
*=
t
->
dimSizeRDI
[
i
];
for
(
int
i
=
dim
+
1
;
i
<
s
->
order
;
i
++
)
{
blockSizeSrc
*=
s
->
dimSize
[
i
];
}
for
(
int
i
=
dim
+
1
;
i
<
t
->
order
;
i
++
)
{
blockSizeTgt
*=
t
->
dimSize
[
i
];
}
for
(
int
i
=
0
;
i
<=
dim
;
i
++
)
{
blockNumSrc
*=
s
->
dimSize
[
i
];
blockNumTgt
*=
t
->
dimSize
[
i
];
}
for
(
int
i
=
dimRDI
;
i
<
s
->
order
;
i
++
)
blockNumSrc
*=
s
->
dimSizeRDI
[
i
];
for
(
int
i
=
dimRDI
;
i
<
t
->
order
;
i
++
)
blockNumTgt
*=
t
->
dimSizeRDI
[
i
];
CheckNTErrors
((
blockSizeSrc
==
blockSizeTgt
),
"Unmatched tensors!"
);
indexOffsetNum
=
blockNumSrc
/
s
->
dimSize
RDI
[
dimRDI
];
indexOffsetNum
=
blockNumSrc
/
s
->
dimSize
[
dim
];
int
realIndexSize
=
indexOffsetNum
*
indexSize
*
copyNum
;
int
*
realSrcIndex
=
new
int
[
realIndexSize
];
...
...
source/tensor/core/movement/Gather.cu
查看文件 @
c9ef15f8
...
...
@@ -160,16 +160,14 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
int devID = srcIndex->devID;
XMem * mem = s->mem;
int dimRDI = srcIndex->order - dim - 1;
int stride = 1;
int indexSize = srcIndex->unitNum;
int strideNum = srcIndex->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= srcIndex->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < srcIndex->order; i++)
blockNum *= srcIndex->dimSizeRDI[i];
int indexSize = srcIndex->unitNum;
int strideNum = srcIndex->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= srcIndex->dimSize[i];
for (int i = dim + 1; i < srcIndex->order; i++)
stride *= srcIndex->dimSize[i];
int * sIndex = NULL;
if (srcIndex->devID < 0) {
...
...
source/tensor/core/reduce/ReduceMax.cpp
查看文件 @
c9ef15f8
...
...
@@ -43,17 +43,16 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors
((
input
->
order
==
output
->
order
+
1
),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
input
->
order
>
dim
&&
dim
>=
0
),
"Illegal dimension to reduce!"
);
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
CheckNTErrors
(
dim
<
input
->
order
,
"Wrong dimension!"
);
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
if
(
i
<
dim
RDI
){
CheckNTErrors
((
input
->
dimSize
RDI
[
i
]
==
output
->
dimSizeRDI
[
i
]),
if
(
i
<
dim
){
CheckNTErrors
((
input
->
dimSize
[
i
]
==
output
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
else
if
(
i
>
dim
RDI
){
CheckNTErrors
((
input
->
dimSize
RDI
[
i
]
==
output
->
dimSizeRDI
[
i
-
1
]),
else
if
(
i
>
dim
){
CheckNTErrors
((
input
->
dimSize
[
i
]
==
output
->
dimSize
[
i
-
1
]),
"Unmatched tensors!"
);
}
}
...
...
@@ -67,31 +66,31 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors
((
input
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
int
stride
=
1
;
int
strideNum
=
input
->
dimSize
RDI
[
dimRDI
];
int
strideNum
=
input
->
dimSize
[
dim
];
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
)
{
if
(
i
<
dimRDI
)
stride
*=
input
->
dimSize
RDI
[
i
];
else
if
(
i
>
dimRDI
)
blockNum
*=
input
->
dimSize
RDI
[
i
];
if
(
i
>
dim
)
stride
*=
input
->
dimSize
[
i
];
else
if
(
i
<
dim
)
blockNum
*=
input
->
dimSize
[
i
];
}
blockSize
=
stride
*
strideNum
;
if
(
input
->
dimSize
RDI
[
0
]
%
(
4
*
32
/
sizeof
(
DTYPE
))
==
0
&&
input
->
dimSizeRDI
[
0
]
>=
32
){
if
(
input
->
dimSize
[
input
->
order
-
1
]
%
(
4
*
32
/
sizeof
(
DTYPE
))
==
0
&&
input
->
dimSize
[
input
->
order
-
1
]
>=
32
){
int
vecBufLength
=
32
/
sizeof
(
DTYPE
);
if
(
dimRDI
==
0
)
{
if
(
dim
==
input
->
order
-
1
)
{
//data is contiguous in dim 0
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
i
;
VectorBuffer
vecBuf
[
4
];
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
vecBuf
[
j
]
=
VectorBuffer
::
loadu
((
DTYPE
*
)(
ip
)
+
j
*
vecBufLength
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
vecBuf
[
j
]
=
VectorBuffer
::
loadu
((
DTYPE
*
)(
ip
)
+
j
*
vecBufLength
);
}
for
(
int
j
=
1
;
j
<
strideNum
/
32
;
j
++
)
{
for
(
int
j
=
1
;
j
<
strideNum
/
32
;
j
++
)
{
const
DTYPE
*
ptr
=
(
DTYPE
*
)(
ip
+
j
*
vecBufLength
);
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
0
*
vecBufLength
));
vecBuf
[
1
]
=
vecBuf
[
1
].
maxData
(
VectorBuffer
::
loadu
(
ptr
+
1
*
vecBufLength
));
...
...
@@ -102,16 +101,17 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
vecBuf
[
2
]);
vecBuf
[
0
]
=
vecBuf
[
0
].
maxData
(
vecBuf
[
3
]);
DTYPE
maxN
=
DTYPE_MIN
;
for
(
int
k
=
0
;
k
<
vecBufLength
;
k
++
)
{
maxN
=
MAX
(
maxN
,
vecBuf
[
0
][
k
]);
for
(
int
k
=
0
;
k
<
vecBufLength
;
k
++
)
{
maxN
=
MAX
(
maxN
,
vecBuf
[
0
][
k
]);
}
*
op
=
maxN
;
}
}
else
{
}
else
{
//data is separated
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
for
(
int
j
=
0
;
j
<
input
->
dimSize
RDI
[
0
]
/
32
;
j
++
){
for
(
int
j
=
0
;
j
<
input
->
dimSize
[
input
->
order
-
1
]
/
32
;
j
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
i
;
VectorBuffer
vecBuf
[
4
];
...
...
source/tensor/core/reduce/ReduceMax.cu
查看文件 @
c9ef15f8
...
...
@@ -504,13 +504,12 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
if(i < dim
RDI
){
CheckNTErrors(input->dimSize
RDI[i] == output->dimSizeRDI
[i], "Unmatched tensors!");
if(i < dim){
CheckNTErrors(input->dimSize
[i] == output->dimSize
[i], "Unmatched tensors!");
}
else if(i > dim
RDI
){
CheckNTErrors(input->dimSize
RDI[i] == output->dimSizeRDI
[i - 1], "Unmatched tensors!");
else if(i > dim){
CheckNTErrors(input->dimSize
[i] == output->dimSize
[i - 1], "Unmatched tensors!");
}
}
...
...
@@ -518,15 +517,15 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
int cudaBlockSize[3];
int iter = 0;
int stride = 1;
int strideNum = input->dimSize
RDI[dimRDI
];
int strideNum = input->dimSize
[dim
];
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i < dim
RDI
)
stride *= input->dimSizeRDI
[i];
else if (i > dim
RDI
)
blockNum *= input->dimSizeRDI
[i];
if (i < dim)
blockNum *= input->dimSize
[i];
else if (i > dim)
stride *= input->dimSize
[i];
}
blockSize = stride * strideNum;
...
...
source/tensor/core/reduce/ReduceMean.cpp
查看文件 @
c9ef15f8
...
...
@@ -39,8 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{
CheckNTErrors
((
input
->
order
>
dim
),
"Illegal dimension specified!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
int
num
=
input
->
dimSize
[
dim
];
_ReduceSum
(
input
,
output
,
dim
);
_ScaleAndShiftMe
(
output
,
(
DTYPE
)
1
/
num
,
0
);
...
...
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
c9ef15f8
...
...
@@ -53,15 +53,14 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
CheckNTErrors
((
shift
==
NULL
||
XTensor
::
IsSameShaped
(
output
,
shift
)),
"Incorrect shift tensor size!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
CheckNTErrors
(
dim
<
input
->
order
,
"Wrong dimension!"
);
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
if
(
i
<
dim
RDI
){
CheckNTErrors
((
input
->
dimSize
RDI
[
i
]
==
output
->
dimSizeRDI
[
i
]),
"Unmatched tensors!"
);
if
(
i
<
dim
){
CheckNTErrors
((
input
->
dimSize
[
i
]
==
output
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
else
if
(
i
>
dim
RDI
){
CheckNTErrors
((
input
->
dimSize
RDI
[
i
]
==
output
->
dimSizeRDI
[
i
-
1
]),
"Unmatched tensors!"
);
else
if
(
i
>
dim
){
CheckNTErrors
((
input
->
dimSize
[
i
]
==
output
->
dimSize
[
i
-
1
]),
"Unmatched tensors!"
);
}
}
...
...
@@ -74,21 +73,21 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors
((
input
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
int
stride
=
1
;
int
strideNum
=
input
->
dimSize
RDI
[
dimRDI
];
int
strideNum
=
input
->
dimSize
[
dim
];
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
)
{
if
(
i
<
dim
RDI
)
stride
*=
input
->
dimSizeRDI
[
i
];
else
if
(
i
>
dim
RDI
)
blockNum
*=
input
->
dimSizeRDI
[
i
];
if
(
i
<
dim
)
blockNum
*=
input
->
dimSize
[
i
];
else
if
(
i
>
dim
)
stride
*=
input
->
dimSize
[
i
];
}
blockSize
=
stride
*
strideNum
;
if
(
input
->
dimSize
RDI
[
0
]
%
(
4
*
32
/
sizeof
(
DTYPE
))
==
0
&&
input
->
dimSizeRDI
[
0
]
>=
32
){
if
(
input
->
dimSize
[
input
->
order
-
1
]
%
(
4
*
32
/
sizeof
(
DTYPE
))
==
0
&&
input
->
dimSize
[
input
->
order
-
1
]
>=
32
){
int
vecBufLength
=
32
/
sizeof
(
DTYPE
);
if
(
dim
RDI
==
0
){
if
(
dim
==
input
->
order
-
1
){
//data is contiguous in dim 0
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
// stride = 1
...
...
@@ -122,7 +121,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
}
else
{
//data is separated
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
for
(
int
j
=
0
;
j
<
input
->
dimSize
RDI
[
0
]
/
32
;
j
++
){
for
(
int
j
=
0
;
j
<
input
->
dimSize
[
input
->
order
-
1
]
/
32
;
j
++
){
DTYPE
*
ip
=
(
DTYPE
*
)
input
->
data
+
blockSize
*
i
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
+
stride
*
i
;
DTYPE
*
sp
=
shift
!=
NULL
?
(
DTYPE
*
)
shift
->
data
+
stride
*
i
:
NULL
;
...
...
source/tensor/core/reduce/ReduceSum.cu
查看文件 @
c9ef15f8
...
...
@@ -692,13 +692,12 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
if(i < dim
RDI
){
CheckNTErrors(input->dimSize
RDI[i] == output->dimSizeRDI
[i], "Unmatched tensors!");
if(i < dim){
CheckNTErrors(input->dimSize
[i] == output->dimSize
[i], "Unmatched tensors!");
}
else if(i > dim
RDI
){
CheckNTErrors(input->dimSize
RDI[i] == output->dimSizeRDI
[i - 1], "Unmatched tensors!");
else if(i > dim){
CheckNTErrors(input->dimSize
[i] == output->dimSize
[i - 1], "Unmatched tensors!");
}
}
...
...
@@ -709,15 +708,15 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
int cudaBlockSize[3];
int iter = 0;
int stride = 1;
int strideNum = input->dimSize
RDI[dimRDI
];
int strideNum = input->dimSize
[dim
];
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i < dim
RDI
)
stride *= input->dimSizeRDI
[i];
else if (i > dim
RDI
)
blockNum *= input->dimSizeRDI
[i];
if (i < dim)
blockNum *= input->dimSize
[i];
else if (i > dim)
stride *= input->dimSize
[i];
}
blockSize = stride * strideNum;
...
...
source/tensor/core/reduce/ReduceVariance.cpp
查看文件 @
c9ef15f8
...
...
@@ -38,8 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
void
_ReduceVariance
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
)
{
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
int
num
=
input
->
dimSize
[
dim
];
_ReduceSum
(
input
,
output
,
dim
,
mean
,
2.0
F
);
_ScaleAndShiftMe
(
output
,
(
DTYPE
)
1
/
num
,
0
);
}
...
...
source/tensor/core/reduce/VectorBuffer.cpp
查看文件 @
c9ef15f8
...
...
@@ -20,7 +20,7 @@
*/
#include "VectorBuffer.h"
#include "math.h"
namespace
nts
{
/* data size for each buffer */
int
VectorBuffer
::
size
()
...
...
source/tensor/core/shape/ConcatenateSolely.cpp
查看文件 @
c9ef15f8
...
...
@@ -39,30 +39,29 @@ void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
CheckNTErrors
(
big
->
order
>
dim
&&
dim
>=
0
,
"Illegal dimension to concatenate!"
);
int
catDimSize
=
0
;
int
dimRDI
=
big
->
order
-
dim
-
1
;
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
CheckNTErrors
((
big
->
order
==
tensor
->
order
),
"Unmatched tensor orders!"
);
for
(
int
j
=
0
;
j
<
big
->
order
;
j
++
)
{
if
(
j
!=
dim
RDI
)
{
CheckNTErrors
((
big
->
dimSize
RDI
[
j
]
==
tensor
->
dimSizeRDI
[
j
]),
"Unmatched tensor sizes!"
);
if
(
j
!=
dim
)
{
CheckNTErrors
((
big
->
dimSize
[
j
]
==
tensor
->
dimSize
[
j
]),
"Unmatched tensor sizes!"
);
}
else
{
catDimSize
+=
tensor
->
dimSize
RDI
[
j
];
catDimSize
+=
tensor
->
dimSize
[
j
];
}
}
}
CheckNTErrors
((
catDimSize
==
big
->
dimSize
RDI
[
dimRDI
]),
"Unmatched tensor sizes!"
);
CheckNTErrors
((
catDimSize
==
big
->
dimSize
[
dim
]),
"Unmatched tensor sizes!"
);
int
stride
=
1
;
for
(
int
i
=
0
;
i
<
dimRDI
;
i
++
)
stride
*=
big
->
dimSizeRDI
[
i
];
int
blockNum
=
1
;
for
(
int
i
=
dimRDI
+
1
;
i
<
big
->
order
;
i
++
)
blockNum
*=
big
->
dimSizeRDI
[
i
];
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
blockNum
*=
big
->
dimSize
[
i
];
for
(
int
i
=
dim
+
1
;
i
<
big
->
order
;
i
++
)
stride
*=
big
->
dimSize
[
i
];
int
offset
=
0
;
...
...
@@ -74,8 +73,8 @@ void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
if
(
smalls
->
count
<=
MIN_TENSOR_CAT_NUM
)
{
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
int
sPitch
=
stride
*
tensor
->
dimSize
RDI
[
dimRDI
]
*
tensor
->
unitSize
;
int
tPitch
=
stride
*
big
->
dimSize
RDI
[
dimRDI
]
*
big
->
unitSize
;
int
sPitch
=
stride
*
tensor
->
dimSize
[
dim
]
*
tensor
->
unitSize
;
int
tPitch
=
stride
*
big
->
dimSize
[
dim
]
*
big
->
unitSize
;
int
mSize
=
sPitch
;
int
n
=
blockNum
;
XMemCopy2D
((
char
*
)
big
->
data
+
offset
,
tPitch
,
big
->
devID
,
...
...
@@ -89,7 +88,7 @@ void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
int
*
blockSizes
=
new
int
[
smalls
->
count
];
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
blockSizes
[
i
]
=
stride
*
tensor
->
dimSize
RDI
[
dimRDI
]
*
tensor
->
unitSize
;
blockSizes
[
i
]
=
stride
*
tensor
->
dimSize
[
dim
]
*
tensor
->
unitSize
;
sourceArrays
->
Add
(
tensor
->
data
);
}
...
...
source/tensor/core/shape/Merge.cpp
查看文件 @
c9ef15f8
...
...
@@ -45,10 +45,8 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
if
(
leadingDim
<
0
)
leadingDim
=
0
;
int
whereToMergeRDI
=
s
->
order
-
whereToMerge
-
1
;
int
leadingDimRDI
=
s
->
order
-
leadingDim
-
1
;
if
(
leadingDimRDI
<
0
)
leadingDimRDI
=
s
->
order
-
1
;
if
(
leadingDim
>=
s
->
order
)
leadingDim
=
leadingDim
-
s
->
order
;
CheckNTErrors
((
s
!=
NULL
&&
t
!=
NULL
),
"Invalid tensors!"
);
CheckNTErrors
((
s
->
devID
==
t
->
devID
||
(
s
->
devID
<
0
&&
t
->
devID
<
0
)),
...
...
@@ -56,19 +54,20 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
CheckNTErrors
((
s
->
unitNum
==
t
->
unitNum
&&
s
->
unitSize
==
t
->
unitSize
),
"Unmatched tensors!"
);
CheckNTErrors
((
s
->
order
==
t
->
order
+
1
),
"Unmatched tensors!"
);
CheckNTErrors
((
leadingDim
RDI
>
whereToMergeRDI
),
"Invalid leading dimension!"
);
CheckNTErrors
((
leadingDim
<
whereToMerge
),
"Invalid leading dimension!"
);
for
(
int
i
=
0
;
i
<
s
->
order
;
i
++
)
{
if
(
i
==
whereToMergeRDI
)
{
CheckNTErrors
((
t
->
dimSizeRDI
[
i
]
==
s
->
dimSizeRDI
[
i
]
*
s
->
dimSizeRDI
[
leadingDimRDI
]),
if
(
i
==
whereToMerge
)
{
CheckNTErrors
((
t
->
dimSize
[
i
-
1
]
==
s
->
dimSize
[
i
]
*
s
->
dimSize
[
leadingDim
]),
"Unmatched tensor sizes!"
);
}
else
if
(
i
<
leadingDim
RDI
){
CheckNTErrors
((
s
->
dimSize
RDI
[
i
]
==
t
->
dimSizeRDI
[
i
]),
else
if
(
i
<
leadingDim
){
CheckNTErrors
((
s
->
dimSize
[
i
]
==
t
->
dimSize
[
i
]),
"Unmatched tensor sizes!"
);
}
else
if
(
i
>
leadingDim
RDI
)
{
CheckNTErrors
((
s
->
dimSize
RDI
[
i
]
==
t
->
dimSizeRDI
[
i
-
1
]),
else
if
(
i
>
leadingDim
)
{
CheckNTErrors
((
s
->
dimSize
[
i
]
==
t
->
dimSize
[
i
-
1
]),
"Unmatched tensor sizes!"
);
}
}
...
...
@@ -77,14 +76,14 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
int
blockNum
=
1
;
int
gridSize
=
1
;
int
gridNum
=
1
;
int
mergedNum
=
s
->
dimSize
RDI
[
leadingDimRDI
];
int
mergedNum
=
s
->
dimSize
[
leadingDim
];
for
(
int
i
=
0
;
i
<
s
->
order
;
i
++
)
{
if
(
i
<=
leadingDimRDI
)
{
if
(
i
<=
whereToMergeRDI
)
blockSize
*=
s
->
dimSize
RDI
[
i
];
if
(
i
>=
leadingDim
)
{
if
(
i
>=
whereToMerge
)
blockSize
*=
s
->
dimSize
[
i
];
else
blockNum
*=
s
->
dimSize
RDI
[
i
];
blockNum
*=
s
->
dimSize
[
i
];
}
}
...
...
@@ -121,7 +120,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
if
(
!
isOnSameDevice
)
dataTMP
=
mem
!=
NULL
?
mem
->
AllocBuf
(
mem
->
devID
,
size
)
:
XMemAlloc
(
mem
->
devID
,
size
);
int
blockNumInMerge
=
s
->
dimSize
RDI
[
leadingDimRDI
];
int
blockNumInMerge
=
s
->
dimSize
[
leadingDim
];
int
splitSizeInGrid
=
gridSize
/
blockNumInMerge
;
int
realBlockSize
=
blockSize
*
t
->
unitSize
;
...
...
@@ -238,12 +237,11 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
int
mergedNum
=
smalls
->
count
;
XTensor
*
s0
=
(
XTensor
*
)
smalls
->
GetItem
(
0
);
int
whereToMergeRDI
=
s0
->
order
-
whereToMerge
-
1
;
for
(
int
i
=
0
;
i
<
s0
->
order
;
i
++
)
{
if
(
i
<=
whereToMergeRDI
)
blockSize
*=
s0
->
dimSize
RDI
[
i
];
if
(
i
>=
whereToMerge
)
blockSize
*=
s0
->
dimSize
[
i
];
else
blockNum
*=
s0
->
dimSize
RDI
[
i
];
blockNum
*=
s0
->
dimSize
[
i
];
}
CheckNTErrors
((
s0
->
unitNum
%
(
blockSize
*
blockNum
)
==
0
),
"Incorrect size!"
);
...
...
source/tensor/core/shape/Split.cpp
查看文件 @
c9ef15f8
...
...
@@ -46,23 +46,22 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
CheckNTErrors
((
s
->
unitNum
==
t
->
unitNum
&&
s
->
unitSize
==
t
->
unitSize
),
"Unmatched tensors!"
);
CheckNTErrors
((
s
->
order
==
t
->
order
-
1
),
"Unmatched tensors!"
);
CheckNTErrors
((
t
->
dimSize
RDI
[
t
->
order
-
1
]
==
splitNum
),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
t
->
dimSize
[
0
]
==
splitNum
),
"Incorrect tensor sizes!"
);
int
whereToSplitRDI
=
s
->
order
-
whereToSplit
-
1
;
for
(
int
i
=
0
;
i
<
s
->
order
;
i
++
)
{
if
(
i
==
whereToSplit
RDI
)
{
CheckNTErrors
((
s
->
dimSize
RDI
[
i
]
==
t
->
dimSizeRDI
[
i
]
*
splitNum
),
if
(
i
==
whereToSplit
)
{
CheckNTErrors
((
s
->
dimSize
[
i
]
==
t
->
dimSize
[
i
+
1
]
*
splitNum
),
"Unmatched tensor sizes!"
);
}
else
{
CheckNTErrors
((
s
->
dimSize
RDI
[
i
]
==
t
->
dimSizeRDI
[
i
]),
CheckNTErrors
((
s
->
dimSize
[
i
]
==
t
->
dimSize
[
i
+
1
]),
"Unmatched tensor sizes!"
);
}
}
/* for the case that we split the last dimension. Actually
(N, M) and (N, M/3, 3) have the same memory layout */
if
(
s
->
order
-
1
==
whereToSplitRDI
)
{
if
(
0
==
whereToSplit
)
{
XMemCopy
(
t
->
data
,
t
->
devID
,
s
->
data
,
s
->
devID
,
s
->
unitNum
*
s
->
unitSize
);
return
;
}
...
...
@@ -70,14 +69,14 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
s
->
order
;
i
++
)
{
if
(
i
==
whereToSplit
RDI
)
{
blockSize
*=
s
->
dimSize
RDI
[
i
]
/
splitNum
;
if
(
i
==
whereToSplit
)
{
blockSize
*=
s
->
dimSize
[
i
]
/
splitNum
;
blockNum
*=
splitNum
;
}
else
if
(
i
<
whereToSplitRDI
)
blockSize
*=
s
->
dimSize
RDI
[
i
];
else
if
(
i
>
whereToSplit
)
blockSize
*=
s
->
dimSize
[
i
];
else
blockNum
*=
s
->
dimSize
RDI
[
i
];
blockNum
*=
s
->
dimSize
[
i
];
}
CheckNTErrors
((
blockNum
%
splitNum
==
0
),
"Incorrect split number!"
);
...
...
@@ -215,7 +214,6 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
CheckNTErrors
((
smalls
->
count
==
splitNum
),
"Unmatched tensors!"
);
CheckNTErrors
((
smalls
->
count
>
0
),
"Wrong input!"
);
int
whereToSplitRDI
=
big
->
order
-
whereToSplit
-
1
;
bool
uniform
=
true
;
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
...
...
@@ -231,14 +229,14 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
big
->
order
;
i
++
)
{
if
(
i
==
whereToSplit
RDI
)
{
blockSize
*=
big
->
dimSize
RDI
[
i
]
/
splitNum
;
if
(
i
==
whereToSplit
)
{
blockSize
*=
big
->
dimSize
[
i
]
/
splitNum
;
blockNum
*=
splitNum
;
}
else
if
(
i
<
whereToSplitRDI
)
blockSize
*=
big
->
dimSize
RDI
[
i
];
else
if
(
i
>
whereToSplit
)
blockSize
*=
big
->
dimSize
[
i
];
else
blockNum
*=
big
->
dimSize
RDI
[
i
];
blockNum
*=
big
->
dimSize
[
i
];
}
CheckNTErrors
((
blockNum
%
splitNum
==
0
),
"Incorrect split number!"
);
...
...
source/tensor/core/shape/Unsqueeze.cpp
查看文件 @
c9ef15f8
...
...
@@ -42,16 +42,15 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
CheckNTErrors
((
a
->
order
==
b
->
order
-
1
),
"Unmatched tensors!"
);
CheckNTErrors
((
a
->
unitSize
==
b
->
unitSize
),
"Unmatched tensors!"
);
int
dimRDI
=
b
->
order
-
dim
-
1
;
for
(
int
i
=
0
;
i
<
b
->
order
;
i
++
)
{
if
(
i
<
dim
RDI
)
{
CheckNTErrors
((
a
->
dimSize
RDI
[
i
]
==
b
->
dimSizeRDI
[
i
]),
"Unmatched tensors!"
);
if
(
i
<
dim
)
{
CheckNTErrors
((
a
->
dimSize
[
i
]
==
b
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
else
if
(
i
>
dim
RDI
)
{
CheckNTErrors
((
a
->
dimSize
RDI
[
i
-
1
]
==
b
->
dimSizeRDI
[
i
]),
"Unmatched tensors!"
);
else
if
(
i
>
dim
)
{
CheckNTErrors
((
a
->
dimSize
[
i
-
1
]
==
b
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
else
{
CheckNTErrors
((
dSize
==
b
->
dimSize
RDI
[
i
]),
"Unmatched tensors!"
);
CheckNTErrors
((
dSize
==
b
->
dimSize
[
i
]),
"Unmatched tensors!"
);
}
}
...
...
@@ -60,8 +59,8 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int
blockNumA
=
1
;
int
blockNumB
=
1
;
for
(
int
i
=
0
;
i
<
dimRDI
;
i
++
)
blockSize
*=
a
->
dimSize
RDI
[
i
];
for
(
int
i
=
dim
;
i
<
a
->
order
;
i
++
)
blockSize
*=
a
->
dimSize
[
i
];
realBlockSize
=
blockSize
*
a
->
unitSize
;
...
...
source/tensor/core/shape/Unsqueeze.cu
查看文件 @
c9ef15f8
...
...
@@ -235,9 +235,8 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int blockSize = 1;
int blockNumA = 1;
int blockNumB = 1;
int dimRDI = b->order - dim - 1;
for (int i = 0; i < dimRDI; i++)
blockSize *= a->dimSizeRDI[i];
for (int i = dim; i < a->order; i++)
blockSize *= a->dimSize[i];
blockNumA = a->unitNum / blockSize;
blockNumB = b->unitNum / blockSize;
...
...
@@ -250,7 +249,7 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
if (dim
RDI == 0
) {
if (dim
== b->order - 1
) {
GDevs.GetCudaThread2D(a->devID, dSize, blockNumA, MAX_INT, cudaGrids, cudaBlocks);
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT) {
...
...
source/tensor/core/sort/Sort.cpp
查看文件 @
c9ef15f8
...
...
@@ -42,7 +42,6 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
CheckNTErrors
((
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
int
dimRDI
=
a
->
order
-
dim
-
1
;
/* make the index tensor */
index
->
SetAscendingOrder
(
dim
);
...
...
@@ -55,13 +54,13 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
}
else
{
int
stride
=
1
;
int
strideNum
=
a
->
dimSizeRDI
[
dimRDI
];
for
(
int
i
=
0
;
i
<
dimRDI
;
i
++
)
stride
*=
a
->
dimSizeRDI
[
i
];
int
blockNum
=
1
;
for
(
int
i
=
dimRDI
+
1
;
i
<
a
->
order
;
i
++
)
blockNum
*=
a
->
dimSizeRDI
[
i
];
int
strideNum
=
a
->
dimSize
[
dim
];
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
blockNum
*=
a
->
dimSize
[
i
];
for
(
int
i
=
dim
+
1
;
i
<
a
->
order
;
i
++
)
stride
*=
a
->
dimSize
[
i
];
int
blockSize
=
stride
*
strideNum
;
_CopyValues
(
a
,
b
);
...
...
source/tensor/core/sort/Sort.cu
查看文件 @
c9ef15f8
...
...
@@ -217,20 +217,19 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI];
if (k < 0 || k > b->dimSize[dim])
k = b->dimSize[dim];
XMem * mem = a->mem;
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNum = a->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int m = GetNextPower2(strideNum);
int n = stride * blockNum;
...
...
source/tensor/core/sort/TopK.cpp
查看文件 @
c9ef15f8
...
...
@@ -41,15 +41,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors
((
index
==
NULL
||
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
int
dimRDI
=
a
->
order
-
dim
-
1
;
for
(
int
i
=
0
;
i
<
a
->
order
;
i
++
)
{
if
(
i
==
dim
RDI
)
{
CheckNTErrors
((
b
->
dimSize
RDI
[
i
]
==
k
),
"A too large K"
);
CheckNTErrors
((
index
==
NULL
||
index
->
dimSize
RDI
[
i
]
==
k
),
"Wrong size!"
);
if
(
i
==
dim
)
{
CheckNTErrors
((
b
->
dimSize
[
i
]
==
k
),
"A too large K"
);
CheckNTErrors
((
index
==
NULL
||
index
->
dimSize
[
i
]
==
k
),
"Wrong size!"
);
}
else
{
CheckNTErrors
((
b
->
dimSize
RDI
[
i
]
==
a
->
dimSizeRDI
[
i
]),
"Wrong size!"
);
CheckNTErrors
((
index
==
NULL
||
index
->
dimSize
RDI
[
i
]
==
a
->
dimSizeRDI
[
i
]),
"Wrong size!"
);
CheckNTErrors
((
b
->
dimSize
[
i
]
==
a
->
dimSize
[
i
]),
"Wrong size!"
);
CheckNTErrors
((
index
==
NULL
||
index
->
dimSize
[
i
]
==
a
->
dimSize
[
i
]),
"Wrong size!"
);
}
}
...
...
@@ -64,14 +63,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
int
stride
=
1
;
int
strideNumA
=
a
->
dimSizeRDI
[
dimRDI
];
int
strideNumB
=
b
->
dimSizeRDI
[
dimRDI
];
for
(
int
i
=
0
;
i
<
dimRDI
;
i
++
)
stride
*=
a
->
dimSizeRDI
[
i
];
int
blockNum
=
1
;
for
(
int
i
=
dimRDI
+
1
;
i
<
a
->
order
;
i
++
)
blockNum
*=
a
->
dimSizeRDI
[
i
];
int
strideNumA
=
a
->
dimSize
[
dim
];
int
strideNumB
=
b
->
dimSize
[
dim
];
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
blockNum
*=
a
->
dimSize
[
i
];
for
(
int
i
=
dim
+
1
;
i
<
a
->
order
;
i
++
)
stride
*=
a
->
dimSize
[
i
];
int
blockSizeA
=
stride
*
strideNumA
;
int
blockSizeB
=
stride
*
strideNumB
;
...
...
source/tensor/core/sort/TopK.cu
查看文件 @
c9ef15f8
...
...
@@ -811,15 +811,14 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
CheckNTErrors((b->dimSize[dim] == k), "A too large K");
int dimRDI = a->order - dim - 1;
int stride = 1;
int strideNumA = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNumA = a->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int workerNum = blockNum < 16 ? 64 : 32;
/* adjust the thread num according size of k for fitting the share memory size */
...
...
source/tensor/core/utilities/SetAscendingOrder.cu
查看文件 @
c9ef15f8
...
...
@@ -67,15 +67,14 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{
CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1;
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for(int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNum = a->dimSize[dim];
for(int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for(int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int gridSize[3];
int blockSize[3];
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
c9ef15f8
...
...
@@ -49,7 +49,6 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
return
;
}
int
leadDimRDI
=
x
->
order
-
leadDim
-
1
;
if
(
!
x
->
isSparse
&&
!
y
->
isSparse
&&
x
->
dataType
==
DEFAULT_DTYPE
&&
y
->
dataType
==
DEFAULT_DTYPE
)
{
...
...
@@ -69,13 +68,13 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
XTensor
*
blockMax
=
NULL
;
XTensor
*
blockSum
=
NULL
;
int
dimensionSize
=
y
->
dimSize
RDI
[
leadDimRDI
];
int
dimensionSize
=
y
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
y
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
y
->
order
;
i
++
)
stride
*=
y
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
y
->
unitNum
/
blockSize
;
...
...
@@ -86,7 +85,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
_ReduceSum
(
x
,
sum
,
leadDim
,
max
,
1.0
F
,
true
);
if
(
x
->
devID
>=
0
)
{
if
(
leadDim
RDI
==
0
){
if
(
leadDim
==
x
->
order
-
1
){
blockSize
=
y
->
unitNum
;
blockNum
=
1
;
blockx
=
NewTensor2D
(
blockSize
/
dimensionSize
,
-
dimensionSize
,
x
->
dataType
,
x
->
devID
,
mem
);
...
...
@@ -137,7 +136,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
blockMax
->
data
=
mp
;
blockSum
->
data
=
sp
;
#ifdef USE_CUDA
if
(
leadDim
RDI
==
0
)
if
(
leadDim
==
x
->
order
-
1
)
_CudaLogSoftmaxSumMax
(
blockx
,
blocky
,
1
,
blockSum
,
blockMax
);
else
_CudaLogSoftmaxSumMax
(
blockx
,
blocky
,
leadDim
,
blockSum
,
blockMax
);
...
...
@@ -289,7 +288,6 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
if
(
leadDim
<
0
)
leadDim
=
y
->
order
-
1
;
int
leadDimRDI
=
y
->
order
-
leadDim
-
1
;
#ifdef USE_CUDA
if
(
gold
->
devID
>=
0
)
{
_CudaLogSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
padding
,
leadDim
,
lossName
);
...
...
@@ -297,12 +295,12 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
}
#endif
int
dimensionSize
=
y
->
dimSize
RDI
[
leadDimRDI
];
int
dimensionSize
=
y
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
y
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
y
->
order
-
1
;
i
++
)
stride
*=
y
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
y
->
unitNum
/
blockSize
;
...
...
@@ -329,10 +327,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int
key
=
gold
->
GetKeyInSparse
(
i
);
DTYPE
value
=
gold
->
GetInSparse
(
i
);
int
offset
=
key
;
if
(
dedx
->
dimSize
RDI
[
0
]
!=
gm
)
{
if
(
dedx
->
dimSize
[
dedx
->
order
-
1
]
!=
gm
)
{
int
mi
=
key
%
gm
;
int
ni
=
key
/
gm
;
int
key2
=
ni
*
dedx
->
dimSize
RDI
[
0
]
+
mi
;
int
key2
=
ni
*
dedx
->
dimSize
[
dedx
->
order
-
1
]
+
mi
;
offset
=
key2
;
}
if
(
key
>=
0
&&
key
<
size
)
...
...
@@ -386,10 +384,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int
key
=
gold
->
GetKeyInSparse
(
i
);
DTYPE
value
=
gold
->
GetInSparse
(
i
);
int
offset
=
key
;
if
(
dedx
->
dimSize
RDI
[
0
]
!=
gm
)
{
if
(
dedx
->
dimSize
[
dedx
->
order
-
1
]
!=
gm
)
{
int
mi
=
key
%
gm
;
int
ni
=
key
/
gm
;
int
key2
=
ni
*
dedx
->
dimSize
RDI
[
0
]
+
mi
;
int
key2
=
ni
*
dedx
->
dimSize
[
dedx
->
order
-
1
]
+
mi
;
offset
=
key2
;
}
if
(
key
>=
0
&&
key
<
size
)
...
...
@@ -421,11 +419,11 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* for columns with no xs we set dE/ds = 0 */
if
(
gold
!=
NULL
&&
gold
->
isSparse
)
{
CheckNTErrors
((
gold
->
order
==
2
),
"The gold standard tensor must be of order 2!"
);
if
((
gold
->
dimSize
[
1
]
>
1
&&
!
gold
->
isAllValued
[
0
])
||
gold
->
dimSize
[
1
]
!=
dedx
->
dimSize
RDI
[
0
])
{
if
((
gold
->
dimSize
[
1
]
>
1
&&
!
gold
->
isAllValued
[
0
])
||
gold
->
dimSize
[
1
]
!=
dedx
->
dimSize
[
dedx
->
order
-
1
])
{
int
gn
=
gold
->
dimSize
[
0
];
int
gm
=
gold
->
dimSize
[
1
];
int
sm
=
dedx
->
dimSize
RDI
[
0
];
int
sn
=
dedx
->
dimSize
RDI
[
1
];
int
sm
=
dedx
->
dimSize
[
dedx
->
order
-
1
];
int
sn
=
dedx
->
dimSize
[
dedx
->
order
-
2
];
int
*
flags
=
new
int
[
sm
];
memset
(
flags
,
0
,
sizeof
(
int
)
*
sm
);
...
...
source/tensor/function/LogSoftmax.cu
查看文件 @
c9ef15f8
...
...
@@ -384,13 +384,12 @@ void _CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
"Tensors used in log softmax are not on the same GPU.");
CheckNTErrors((gold != NULL), "No x gold standard is found!");
int leadDimRDI = y->order - leadDim - 1;
int dimensionSize = y->dimSizeRDI[leadDimRDI];
int dimensionSize = y->dimSize[leadDim];
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for (int i =
0; i < leadDimRDI
; i++)
stride *= y->dimSize
RDI
[i];
for (int i =
leadDim + 1; i < y->order
; i++)
stride *= y->dimSize[i];
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
...
...
source/tensor/function/Loss.cpp
查看文件 @
c9ef15f8
...
...
@@ -49,18 +49,17 @@ DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
if
(
output
->
devID
<
0
)
{
CheckNTErrors
((
gLen
>=
0
&&
gLen
<=
output
->
unitNum
),
"Illegal input length!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
gold
,
output
)),
"The input tensors must be of the same size!"
);
CheckNTErrors
((
gold
->
dimSize
RDI
[
0
]
==
1
&&
output
->
dimSizeRDI
[
0
]
==
1
),
"TODO!"
);
CheckNTErrors
((
gold
->
dimSize
[
gold
->
order
-
1
]
==
1
&&
output
->
dimSize
[
output
->
order
-
1
]
==
1
),
"TODO!"
);
CheckNTErrors
((
gold
->
order
>
leadDim
&&
leadDim
>=
0
),
"Illegal leading dimension!"
);
CheckNTErrors
((
gold
->
dataType
==
DEFAULT_DTYPE
&&
output
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
int
leadDimRDI
=
output
->
order
-
leadDim
-
1
;
int
dimensionSize
=
output
->
dimSizeRDI
[
leadDimRDI
];
int
dimensionSize
=
output
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
output
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
output
->
order
;
i
++
)
stride
*=
output
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
output
->
unitNum
/
blockSize
;
...
...
@@ -206,18 +205,17 @@ DTYPE _LossComputeForLogScale(XTensor * gold, XTensor * output,
{
CheckNTErrors
(
gLen
>=
0
&&
gLen
<=
output
->
unitNum
,
"Illegal input length!"
);
CheckNTErrors
(
XTensor
::
IsSameShaped
(
gold
,
output
),
"The input tensors must be of the same size!"
);
CheckNTErrors
(
gold
->
dimSize
RDI
[
0
]
==
1
&&
output
->
dimSizeRDI
[
0
]
==
1
,
"TODO!"
);
CheckNTErrors
(
gold
->
dimSize
[
gold
->
order
-
1
]
==
1
&&
output
->
dimSize
[
output
->
order
-
1
]
==
1
,
"TODO!"
);
CheckNTErrors
(
gold
->
order
>
leadDim
&&
leadDim
>=
0
,
"Illegal leading dimension!"
);
CheckNTErrors
(
gold
->
dataType
==
DEFAULT_DTYPE
&&
output
->
dataType
==
DEFAULT_DTYPE
,
"TODO!"
);
int
leadDimRDI
=
output
->
order
-
leadDim
-
1
;
int
dimensionSize
=
output
->
dimSizeRDI
[
leadDimRDI
];
int
dimensionSize
=
output
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
output
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
output
->
order
;
i
++
)
stride
*=
output
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
output
->
unitNum
/
blockSize
;
...
...
@@ -408,21 +406,21 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
CheckNTErrors
(
t
->
order
>
leadDim
,
"Illegal leading dimension!"
);
CheckNTErrors
(
t
->
dataType
==
DEFAULT_DTYPE
&&
y
->
dataType
==
DEFAULT_DTYPE
,
"TODO!"
);
int
leadDimRDI
=
leadDim
>=
0
?
y
->
order
-
leadDim
-
1
:
-
1
;
if
(
leadDimRDI
<
0
){
leadDimRDI
=
y
->
order
-
1
;
if
(
leadDim
<
0
){
leadDim
=
0
;
tBeg
=
0
;
yBeg
=
0
;
tLen
=
y
->
dimSizeRDI
[
leadDimRDI
];
tLen
=
y
->
dimSize
[
leadDim
];
printf
(
"%d"
,
tLen
);
}
int
dimensionSize
=
y
->
dimSize
RDI
[
leadDimRDI
];
int
dimensionSize
=
y
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
y
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
y
->
order
;
i
++
)
stride
*=
y
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
y
->
unitNum
/
blockSize
;
...
...
source/tensor/function/Loss.cu
查看文件 @
c9ef15f8
...
...
@@ -56,7 +56,7 @@ DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
{
CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSize
RDI[0] == 1 && y->dimSizeRDI[0
] == 1), "TODO!");
CheckNTErrors((gold->dimSize
[gold->order - 1] == 1 && y->dimSize[y->order - 1
] == 1), "TODO!");
CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((gold->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE), "TODO!");
CheckNTErrors((gold->devID == y->devID), "Tensors must be on the same device!");
...
...
@@ -91,7 +91,7 @@ DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSize
RDI[1
] = 1;
diffNew->dimSize
[diffNew->order - 2
] = 1;
}
delete diff;
diff = diffNew;
...
...
@@ -125,7 +125,7 @@ DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSize
RDI[1
] = 1;
diffNew->dimSize
[diffNew->order - 2
] = 1;
}
delete diff;
diff = diffNew;
...
...
@@ -162,7 +162,7 @@ DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0];
diffNew->dimSize[0] = 1;
diffNew->dimSize
RDI[1
] = 1;
diffNew->dimSize
[diffNew->order - 2
] = 1;
}
delete diff;
diff = diffNew;
...
...
@@ -349,22 +349,22 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
"The vectors must be on the same GPU.");
CheckNTErrors((tBeg == yBeg), "TODO!");
int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){
leadDimRDI = y->order - 1;
if(leadDim < 0){
leadDim = 0;
tBeg = 0;
yBeg = 0;
tLen = y->dimSizeRDI[leadDimRDI];
tLen = y->dimSize[leadDim];
printf("%d", tLen);
}
int dimensionSize = y->dimSize
RDI[leadDimRDI
];
int dimensionSize = y->dimSize
[leadDim
];
int stride = 1;
int blockSize = 1;
int blockNum = 1;
int size = 1;
for(int i =
0; i < leadDimRDI
; i++)
stride *= y->dimSize
RDI
[i];
for(int i =
leadDim + 1; i < y->order
; i++)
stride *= y->dimSize[i];
size = tLen * stride;
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
...
...
source/tensor/function/Softmax.cpp
查看文件 @
c9ef15f8
...
...
@@ -40,7 +40,6 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
if
(
leadDim
<
0
)
leadDim
=
x
->
order
-
1
;
int
leadDimRDI
=
x
->
order
-
leadDim
-
1
;
if
(
!
x
->
isSparse
&&
!
y
->
isSparse
&&
x
->
dataType
==
y
->
dataType
){
int
*
dimSize
=
new
int
[
x
->
order
-
1
];
for
(
int
i
=
0
;
i
<
x
->
order
;
i
++
){
...
...
@@ -70,13 +69,13 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
else
{
CheckNTErrors
((
x
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
int
dimensionSize
=
y
->
dimSize
RDI
[
leadDimRDI
];
int
dimensionSize
=
y
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
y
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
y
->
order
;
i
++
)
stride
*=
y
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
y
->
unitNum
/
blockSize
;
...
...
@@ -184,7 +183,6 @@ void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
if
(
leadDim
<
0
)
leadDim
=
y
->
order
-
1
;
int
leadDimRDI
=
y
->
order
-
leadDim
-
1
;
#ifdef USE_CUDA
if
(
y
->
devID
>=
0
){
...
...
@@ -193,12 +191,12 @@ void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
}
#endif
int
dimensionSize
=
y
->
dimSize
RDI
[
leadDimRDI
];
int
dimensionSize
=
y
->
dimSize
[
leadDim
];
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
0
;
i
<
leadDimRDI
;
i
++
)
stride
*=
y
->
dimSize
RDI
[
i
];
for
(
int
i
=
leadDim
+
1
;
i
<
y
->
order
;
i
++
)
stride
*=
y
->
dimSize
[
i
];
blockSize
=
stride
*
dimensionSize
;
blockNum
=
y
->
unitNum
/
blockSize
;
...
...
source/tensor/function/Softmax.cu
查看文件 @
c9ef15f8
...
...
@@ -225,14 +225,13 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
CheckNTErrors((x->devID == y->devID), "Tensors used in softmax are not on the same GPU.");
CheckNTErrors((XTensor::IsSameShaped(x, y)), "Input tensors must be of the same size!");
int leadDimRDI = y->order - leadDim - 1;
int dimensionSize = y->dimSizeRDI[leadDimRDI];
int dimensionSize = y->dimSize[leadDim];
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i =
0; i < leadDimRDI
; i++)
stride *= y->dimSize
RDI
[i];
for(int i =
leadDim + 1; i < y->order
; i++)
stride *= y->dimSize[i];
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论