Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
8
Issues
8
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
NiuTrans
NiuTrans.Tensor
Commits
771643c6
Commit
771643c6
authored
Jul 19, 2019
by
huchi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor parameter from pointer to reference
parent
04f129fc
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
108 个修改的文件
包含
1649 行增加
和
1625 行删除
+1649
-1625
source/network/Main.cpp
+6
-6
source/network/XBackwardMath.cpp
+88
-88
source/network/XBackwardMath.h
+4
-4
source/network/XBackwardShape.cpp
+1
-1
source/network/XNet.cpp
+2
-2
source/sample/fnnlm/FNNLM.cpp
+10
-10
source/sample/transformer/T2TAttention.h
+1
-1
source/sample/transformer/T2TBatchLoader.cpp
+4
-4
source/sample/transformer/T2TBatchLoader.h
+3
-3
source/sample/transformer/T2TSearch.cpp
+1
-1
source/tensor/XDevice.cpp
+1
-1
source/tensor/XList.cpp
+32
-32
source/tensor/XList.h
+23
-23
source/tensor/XMem.cpp
+15
-15
source/tensor/XQueue-李垠桥的MacBook Pro.cpp
+0
-297
source/tensor/XQueue.cpp
+1
-1
source/tensor/XTensor.cpp
+6
-9
source/tensor/XTensor.h
+3
-3
source/tensor/core/arithmetic/Div.cpp
+17
-0
source/tensor/core/arithmetic/Div.cu
+1
-1
source/tensor/core/arithmetic/Div.h
+1
-0
source/tensor/core/arithmetic/Mask.cpp
+11
-0
source/tensor/core/arithmetic/Mask.h
+1
-0
source/tensor/core/arithmetic/MatrixMul2D.cpp
+9
-9
source/tensor/core/arithmetic/MatrixMul2DMultiTheading.cpp
+9
-9
source/tensor/core/arithmetic/Multiply.cpp
+17
-0
source/tensor/core/arithmetic/Multiply.cu
+1
-1
source/tensor/core/arithmetic/Multiply.h
+1
-0
source/tensor/core/arithmetic/MultiplyDim.cpp
+18
-0
source/tensor/core/arithmetic/MultiplyDim.h
+1
-0
source/tensor/core/arithmetic/Negate.cpp
+10
-0
source/tensor/core/arithmetic/Negate.h
+1
-0
source/tensor/core/arithmetic/Sign.cpp
+10
-0
source/tensor/core/arithmetic/Sign.h
+6
-0
source/tensor/core/arithmetic/Sub.cpp
+13
-0
source/tensor/core/arithmetic/Sub.h
+1
-0
source/tensor/core/arithmetic/SubDim.cpp
+75
-75
source/tensor/core/arithmetic/SubDim.cu
+87
-87
source/tensor/core/arithmetic/Sum.cpp
+13
-0
source/tensor/core/arithmetic/Sum.h
+1
-0
source/tensor/core/arithmetic/XTensorBLAS.cpp
+5
-5
source/tensor/core/math/Binary.cpp
+1
-1
source/tensor/core/math/Binary.h
+35
-8
source/tensor/core/math/Clip.cpp
+40
-28
source/tensor/core/math/Clip.cu
+31
-31
source/tensor/core/math/Clip.h
+4
-0
source/tensor/core/math/Compare.h
+6
-0
source/tensor/core/math/Normalize.cpp
+22
-1
source/tensor/core/math/Normalize.cu
+2
-2
source/tensor/core/math/Normalize.h
+8
-0
source/tensor/core/math/Power.cpp
+11
-0
source/tensor/core/math/Power.h
+6
-0
source/tensor/core/math/ScaleAndShift.cpp
+15
-0
source/tensor/core/math/ScaleAndShift.h
+7
-0
source/tensor/core/math/Unary.cpp
+1
-1
source/tensor/core/math/Unary.cu
+1
-1
source/tensor/core/math/Unary.h
+39
-0
source/tensor/core/reduce/ReduceMax.cpp
+3
-3
source/tensor/core/reduce/ReduceMax.cu
+1
-1
source/tensor/core/reduce/ReduceMean.cpp
+2
-2
source/tensor/core/reduce/ReduceSum.cpp
+3
-3
source/tensor/core/reduce/ReduceSum.cu
+2
-2
source/tensor/core/reduce/ReduceSumSquared.cpp
+1
-1
source/tensor/core/reduce/ReduceVariance.cpp
+2
-2
source/tensor/core/shape/ConcatenateSolely.cpp
+1
-1
source/tensor/core/shape/Permute.h
+7
-0
source/tensor/core/shape/Reshape.cpp
+2
-2
source/tensor/core/shape/Squeeze.cpp
+14
-0
source/tensor/core/shape/Squeeze.h
+4
-0
source/tensor/core/sort/Sort.cpp
+17
-2
source/tensor/core/sort/Sort.cu
+1
-1
source/tensor/core/sort/Sort.h
+6
-0
source/tensor/core/sort/TopK.cu
+18
-18
source/tensor/core/utilities/SetAscendingOrder.cu
+2
-2
source/tensor/core/utilities/XMatrixSegment.cpp
+7
-7
source/tensor/test/TAbsolute.cpp
+75
-75
source/tensor/test/TClip.cpp
+93
-93
source/tensor/test/TCompare.cpp
+93
-93
source/tensor/test/TConcatenate.cpp
+82
-82
source/tensor/test/TConcatenateSolely.cpp
+80
-80
source/tensor/test/TConvertDataType.cpp
+142
-142
source/tensor/test/TCos.cpp
+77
-77
source/tensor/test/TDiv.cpp
+94
-94
source/tensor/test/TDivDim.cpp
+4
-4
source/tensor/test/TExp.cpp
+77
-77
source/tensor/test/THardTanH.cpp
+0
-0
source/tensor/test/TIdentity.cpp
+0
-0
source/tensor/test/TLog.cpp
+0
-0
source/tensor/test/TLogSoftmax.cpp
+0
-0
source/tensor/test/TMerge.cpp
+0
-0
source/tensor/test/TMultiply.cpp
+0
-0
source/tensor/test/TNegate.cpp
+0
-0
source/tensor/test/TNormalize.cpp
+0
-0
source/tensor/test/TPower.cpp
+0
-0
source/tensor/test/TRectify.cpp
+0
-0
source/tensor/test/TRound.cpp
+0
-0
source/tensor/test/TSigmoid.cpp
+0
-0
source/tensor/test/TSign.cpp
+0
-0
source/tensor/test/TSin.cpp
+0
-0
source/tensor/test/TSoftmax.cpp
+0
-0
source/tensor/test/TSplit.cpp
+0
-0
source/tensor/test/TSub.cpp
+0
-0
source/tensor/test/TSubDim.cpp
+0
-0
source/tensor/test/TSum.cpp
+0
-0
source/tensor/test/TSumDim.cpp
+0
-0
source/tensor/test/TTan.cpp
+0
-0
source/tensor/test/TTranspose.cpp
+0
-0
source/tensor/test/Test.cpp
+0
-0
没有找到文件。
source/network/Main.cpp
查看文件 @
771643c6
...
@@ -55,7 +55,7 @@ int main( int argc, const char ** argv )
...
@@ -55,7 +55,7 @@ int main( int argc, const char ** argv )
// fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
// fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
// fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
// fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
//}
//}
BackwardTest
();
BackwardTest
();
//_CrtDumpMemoryLeaks();
//_CrtDumpMemoryLeaks();
...
@@ -69,9 +69,9 @@ void BackwardTest()
...
@@ -69,9 +69,9 @@ void BackwardTest()
XTensor
a
;
XTensor
a
;
XTensor
b
;
XTensor
b
;
XTensor
c
;
XTensor
c
;
a
.
enableGrad
=
true
;
a
.
enableGrad
=
true
;
b
.
enableGrad
=
false
;
b
.
enableGrad
=
false
;
c
.
enableGrad
=
false
;
c
.
enableGrad
=
false
;
XTensor
mean
;
XTensor
mean
;
XTensor
origin
;
XTensor
origin
;
InitTensor2D
(
&
a
,
2
,
3
);
InitTensor2D
(
&
a
,
2
,
3
);
...
@@ -89,9 +89,9 @@ void BackwardTest()
...
@@ -89,9 +89,9 @@ void BackwardTest()
b
.
Set1D
(
2.0
F
,
0
);
b
.
Set1D
(
2.0
F
,
0
);
b
.
Set1D
(
1.0
F
,
1
);
b
.
Set1D
(
1.0
F
,
1
);
DivDim
(
a
,
b
,
c
,
0
);
DivDim
(
a
,
b
,
c
,
0
);
c
.
Dump
(
stderr
,
"c:"
);
c
.
Dump
(
stderr
,
"c:"
);
auto
loss
=
CrossEntropy
(
c
,
a
);
auto
loss
=
CrossEntropy
(
c
,
a
);
//XLink::ShowNetwork(stderr, &c);
//XLink::ShowNetwork(stderr, &c);
...
...
source/network/XBackwardMath.cpp
查看文件 @
771643c6
...
@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
...
@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
/* dE/da */
/* dE/da */
_MultiplyDim
(
node
->
grad
,
b
,
a
->
grad
,
n
,
1.0
F
);
_MultiplyDim
(
node
->
grad
,
b
,
a
->
grad
,
n
,
1.0
F
);
/* dE/db */
/* dE/db */
int
order
=
a
->
order
;
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
node
->
grad
,
node
->
devID
,
node
->
mem
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
node
->
grad
,
node
->
devID
,
node
->
mem
);
_Multiply
(
node
->
grad
,
a
,
bGradTMP
);
_Multiply
(
node
->
grad
,
a
,
bGradTMP
);
if
(
n
==
order
-
1
){
if
(
n
==
order
-
1
){
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
...
@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
...
@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
*/
*/
void
XMathGrad
::
GradSubDim
(
XTensor
*
node
,
bool
isEfficient
)
void
XMathGrad
::
GradSubDim
(
XTensor
*
node
,
bool
isEfficient
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUBDIM!"
);
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUBDIM!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
b
=
income
.
tails
[
1
];
int
n
=
income
.
GetParamInt
(
0
);
int
n
=
income
.
GetParamInt
(
0
);
DTYPE
beta
=
income
.
GetParam
(
1
);
DTYPE
beta
=
income
.
GetParam
(
1
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
XNoder
::
MakeGrad
(
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
int
order
=
a
->
order
;
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
if
(
n
==
order
-
1
){
if
(
n
==
order
-
1
){
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
/* we reshape dE/dc to a matrix whose column number is equal to the
/* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */
size of b. Then we can reduce the matrix into a row vector. */
node
->
grad
->
Reshape
(
2
,
reshapedSize
);
node
->
grad
->
Reshape
(
2
,
reshapedSize
);
//if(b->outgo.tailNum > 1){
//if(b->outgo.tailNum > 1){
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
node
->
grad
,
bGradTMP
,
0
);
_ReduceSum
(
node
->
grad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sub
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
_Sub
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
DelTensorBuf
(
bGradTMP
);
/*}
/*}
else{
else{
_ReduceSum(node->grad, b->grad, 0);
_ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F)
if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F);
_ScaleAndShiftMe(b->grad, -1.0F);
}*/
}*/
node
->
grad
->
Reshape
(
order
,
dimSize
);
node
->
grad
->
Reshape
(
order
,
dimSize
);
}
}
else
{
else
{
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
1
;
reshapedSize
[
0
]
=
1
;
reshapedSize
[
1
]
=
dimSize
[
n
];
reshapedSize
[
1
]
=
dimSize
[
n
];
reshapedSize
[
2
]
=
1
;
reshapedSize
[
2
]
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
n
)
if
(
i
<
n
)
reshapedSize
[
0
]
*=
dimSize
[
i
];
reshapedSize
[
0
]
*=
dimSize
[
i
];
}
}
reshapedSize
[
2
]
=
a
->
unitNum
/
(
reshapedSize
[
0
]
*
reshapedSize
[
1
]);
reshapedSize
[
2
]
=
a
->
unitNum
/
(
reshapedSize
[
0
]
*
reshapedSize
[
1
]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
Then reduce along with z and x to obtain dE/db. */
node
->
grad
->
Reshape
(
3
,
reshapedSize
);
node
->
grad
->
Reshape
(
3
,
reshapedSize
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
node
->
grad
,
interGrad
,
2
);
_ReduceSum
(
node
->
grad
,
interGrad
,
2
);
//if(b->outgo.tailNum > 1){
//if(b->outgo.tailNum > 1){
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGrad
,
bGradTMP
,
0
);
_ReduceSum
(
interGrad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sub
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
_Sub
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
DelTensorBuf
(
bGradTMP
);
/*}
/*}
else{
else{
_ReduceSum(interGrad, b->grad, 0);
_ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F)
if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F);
_ScaleAndShiftMe(b->grad, -1.0F);
}*/
}*/
node
->
grad
->
Reshape
(
order
,
dimSize
);
node
->
grad
->
Reshape
(
order
,
dimSize
);
DelTensorBuf
(
interGrad
);
DelTensorBuf
(
interGrad
);
}
}
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
...
...
source/network/XBackwardMath.h
查看文件 @
771643c6
...
@@ -146,10 +146,10 @@ private:
...
@@ -146,10 +146,10 @@ private:
static
static
void
GradSub
(
XTensor
*
node
,
bool
isEfficient
);
void
GradSub
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sub with one dimension: c = a - b * \beta
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
where the size of b is equal to that of one dimension of a */
static
static
void
GradSubDim
(
XTensor
*
node
,
bool
isEfficient
);
void
GradSubDim
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sum: c = a + b * \beta */
/* gradient for sum: c = a + b * \beta */
static
static
...
...
source/network/XBackwardShape.cpp
查看文件 @
771643c6
...
@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
...
@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if
(
income
.
typeID
==
SHAPE_SPLIT_LIST
){
if
(
income
.
typeID
==
SHAPE_SPLIT_LIST
){
int
w
=
income
.
GetParamInt
(
0
);
int
w
=
income
.
GetParamInt
(
0
);
int
splitID
=
income
.
GetParamInt
(
1
);
int
splitID
=
income
.
GetParamInt
(
1
);
if
(
whereToSplit
<
0
)
if
(
whereToSplit
<
0
)
whereToSplit
=
w
;
whereToSplit
=
w
;
splitNum
++
;
splitNum
++
;
...
...
source/network/XNet.cpp
查看文件 @
771643c6
...
@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
...
@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
else
if
(
XShapeGrad
::
IsShapeOP
(
node
))
else
if
(
XShapeGrad
::
IsShapeOP
(
node
))
XShapeGrad
::
MakeGrad
(
node
,
isEfficent
);
XShapeGrad
::
MakeGrad
(
node
,
isEfficent
);
else
if
(
XLossGrad
::
IsLossOP
(
node
))
else
if
(
XLossGrad
::
IsLossOP
(
node
))
XLossGrad
::
MakeGrad
(
node
,
isEfficent
);
XLossGrad
::
MakeGrad
(
node
,
isEfficent
);
else
{
else
{
ShowNTErrors
(
"Wrong node type!"
);
ShowNTErrors
(
"Wrong node type!"
);
}
}
...
@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name
...
@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name
*/
*/
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{
//{
//return XLink::SearchNode(top, name);
//return XLink::SearchNode(top, name);
//}
//}
}
}
source/sample/fnnlm/FNNLM.cpp
查看文件 @
771643c6
...
@@ -482,12 +482,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -482,12 +482,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Clear
(
model
,
true
);
Clear
(
model
,
true
);
/* forward + backward process */
/* forward + backward process */
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
/* this is implemented by multiply function */
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
//ForwardAutoDiff(inputs, output, model);
lossTensor
=
CrossEntropy
(
output
,
gold
);
lossTensor
=
CrossEntropy
(
output
,
gold
);
/* automatic differentiation */
/* automatic differentiation */
...
@@ -1177,12 +1177,12 @@ void Test(const char * test, const char * result, FNNModel &model)
...
@@ -1177,12 +1177,12 @@ void Test(const char * test, const char * result, FNNModel &model)
/* forward computation */
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
Forward
(
inputs
,
output
,
model
,
net
);
}
}
else
{
else
{
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
/* this is implemented by multiply function */
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
//ForwardAutoDiff(inputs, output, model);
}
}
/* prediction probabilities */
/* prediction probabilities */
...
...
source/sample/transformer/T2TAttention.h
查看文件 @
771643c6
...
@@ -61,7 +61,7 @@ public:
...
@@ -61,7 +61,7 @@ public:
XTensor
wa
;
XTensor
wa
;
XTensor
wbig
;
XTensor
wbig
;
/* size of transformed Q and K */
/* size of transformed Q and K */
int
dk
;
int
dk
;
...
...
source/sample/transformer/T2TBatchLoader.cpp
查看文件 @
771643c6
...
@@ -86,7 +86,7 @@ struct SampleNode
...
@@ -86,7 +86,7 @@ struct SampleNode
int
*
p
;
int
*
p
;
int
size
;
int
size
;
int
value
;
int
value
;
int
key
;
int
key
;
};
};
int
CompareSampleNode
(
const
void
*
a
,
const
void
*
b
)
int
CompareSampleNode
(
const
void
*
a
,
const
void
*
b
)
...
@@ -289,7 +289,7 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
...
@@ -289,7 +289,7 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
int
devID
,
XMem
*
mem
,
bool
isTraining
)
bool
isTraining
)
{
{
if
(
isLM
){
if
(
isLM
){
return
LoadBatchLM
(
file
,
batchEnc
,
paddingEnc
,
batchDec
,
paddingDec
,
gold
,
label
,
return
LoadBatchLM
(
file
,
batchEnc
,
paddingEnc
,
batchDec
,
paddingDec
,
gold
,
label
,
...
@@ -331,7 +331,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
...
@@ -331,7 +331,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
int
vSize
,
int
sBatch
,
int
wBatch
,
int
vSize
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
wCount
,
bool
isSorted
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
int
devID
,
XMem
*
mem
,
bool
isTraining
)
bool
isTraining
)
{
{
if
(
nextSeq
<
0
||
nextSeq
>=
nseqBuf
)
if
(
nextSeq
<
0
||
nextSeq
>=
nseqBuf
)
LoadBuf
(
file
,
isSorted
,
1
);
LoadBuf
(
file
,
isSorted
,
1
);
...
@@ -490,7 +490,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
...
@@ -490,7 +490,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int
vSizeEnc
,
int
vSizeDec
,
int
sBatch
,
int
wBatch
,
int
vSizeEnc
,
int
vSizeDec
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
int
devID
,
XMem
*
mem
,
bool
isTraining
)
bool
isTraining
)
{
{
if
(
nextBatch
<
0
||
nextBatch
>=
bufBatchSize
)
{
if
(
nextBatch
<
0
||
nextBatch
>=
bufBatchSize
)
{
LoadBuf
(
file
,
isSorted
,
2
);
LoadBuf
(
file
,
isSorted
,
2
);
...
...
source/sample/transformer/T2TBatchLoader.h
查看文件 @
771643c6
...
@@ -132,7 +132,7 @@ public:
...
@@ -132,7 +132,7 @@ public:
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
int
devID
,
XMem
*
mem
,
bool
isTraining
);
bool
isTraining
);
/* load a batch of sequences (for language modeling) */
/* load a batch of sequences (for language modeling) */
int
LoadBatchLM
(
FILE
*
file
,
int
LoadBatchLM
(
FILE
*
file
,
...
@@ -142,7 +142,7 @@ public:
...
@@ -142,7 +142,7 @@ public:
int
*
seqs
,
int
vs
,
int
sBatch
,
int
wBatch
,
int
*
seqs
,
int
vs
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
wCount
,
bool
isSorted
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
int
devID
,
XMem
*
mem
,
bool
isTraining
);
bool
isTraining
);
/* load a batch of sequences (for machine translation) */
/* load a batch of sequences (for machine translation) */
int
LoadBatchMT
(
FILE
*
file
,
int
LoadBatchMT
(
FILE
*
file
,
...
@@ -152,7 +152,7 @@ public:
...
@@ -152,7 +152,7 @@ public:
int
*
seqs
,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
int
*
seqs
,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
bool
isSorted
,
int
&
ws
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
int
devID
,
XMem
*
mem
,
bool
isTraining
);
bool
isTraining
);
/* shuffle the data file */
/* shuffle the data file */
void
Shuffle
(
const
char
*
srcFile
,
const
char
*
tgtFile
);
void
Shuffle
(
const
char
*
srcFile
,
const
char
*
tgtFile
);
...
...
source/sample/transformer/T2TSearch.cpp
查看文件 @
771643c6
...
@@ -303,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
...
@@ -303,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* Then, we do something similar to "preID". For the top-k predictions, we need
/* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */
in the vocabulary by dividing it with vocab-size and computing the remainder. */
_
ModMe
(
index
,
sizeVocab
);
ModMe
(
index
,
sizeVocab
);
score
.
Reshape
(
order
,
dims
);
score
.
Reshape
(
order
,
dims
);
...
...
source/tensor/XDevice.cpp
查看文件 @
771643c6
...
@@ -528,7 +528,7 @@ get device ids for the given device information
...
@@ -528,7 +528,7 @@ get device ids for the given device information
*/
*/
int
XDevManager
::
GetDeviceIDs
(
char
*
devInfo
,
int
*
devIDs
)
int
XDevManager
::
GetDeviceIDs
(
char
*
devInfo
,
int
*
devIDs
)
{
{
StrList
*
terms
=
new
StrList
(
1
);
StrList
*
terms
=
new
StrList
(
1
);
SplitALine
(
devInfo
,
" "
,
terms
);
SplitALine
(
devInfo
,
" "
,
terms
);
for
(
int
i
=
0
;
i
<
terms
->
count
;
i
++
){
for
(
int
i
=
0
;
i
<
terms
->
count
;
i
++
){
...
...
source/tensor/XList.cpp
查看文件 @
771643c6
...
@@ -90,7 +90,7 @@ template <typename T>
...
@@ -90,7 +90,7 @@ template <typename T>
void
TensorListBase
<
T
>::
Add
(
T
&&
item
)
void
TensorListBase
<
T
>::
Add
(
T
&&
item
)
{
{
if
(
count
==
maxNum
)
{
if
(
count
==
maxNum
)
{
T
*
newItems
;
T
*
newItems
;
if
(
mem
==
NULL
)
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
newItems
=
new
T
[
maxNum
*
2
+
1
];
...
@@ -101,7 +101,7 @@ void TensorListBase<T>::Add(T&& item)
...
@@ -101,7 +101,7 @@ void TensorListBase<T>::Add(T&& item)
maxNum
=
maxNum
*
2
+
1
;
maxNum
=
maxNum
*
2
+
1
;
}
}
items
[
count
++
]
=
item
;
items
[
count
++
]
=
item
;
}
}
/*
/*
...
@@ -111,18 +111,18 @@ add an item into the list
...
@@ -111,18 +111,18 @@ add an item into the list
template
<
typename
T
>
template
<
typename
T
>
void
TensorListBase
<
T
>::
Add
(
const
T
&
item
)
void
TensorListBase
<
T
>::
Add
(
const
T
&
item
)
{
{
if
(
count
==
maxNum
)
{
if
(
count
==
maxNum
)
{
T
*
newItems
;
T
*
newItems
;
if
(
mem
==
NULL
)
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
newItems
=
new
T
[
maxNum
*
2
+
1
];
else
else
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
items
=
newItems
;
items
=
newItems
;
maxNum
=
maxNum
*
2
+
1
;
maxNum
=
maxNum
*
2
+
1
;
}
}
items
[
count
++
]
=
item
;
items
[
count
++
]
=
item
;
}
}
/*
/*
...
@@ -186,21 +186,21 @@ void TensorListBase<T>::Insert(int pos, const T& item)
...
@@ -186,21 +186,21 @@ void TensorListBase<T>::Insert(int pos, const T& item)
template
<
typename
T
>
template
<
typename
T
>
void
TensorListBase
<
T
>::
Insert
(
int
pos
,
T
&&
item
)
void
TensorListBase
<
T
>::
Insert
(
int
pos
,
T
&&
item
)
{
{
if
(
count
==
maxNum
)
{
if
(
count
==
maxNum
)
{
T
*
newItems
;
T
*
newItems
;
if
(
mem
==
NULL
)
if
(
mem
==
NULL
)
newItems
=
new
T
[
maxNum
*
2
+
1
];
newItems
=
new
T
[
maxNum
*
2
+
1
];
else
else
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
newItems
=
(
T
*
)
mem
->
Alloc
(
mem
->
devID
,
sizeof
(
T
)
*
(
maxNum
*
2
+
1
));
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
memcpy
(
newItems
,
items
,
sizeof
(
T
)
*
maxNum
);
items
=
newItems
;
items
=
newItems
;
maxNum
=
maxNum
*
2
+
1
;
maxNum
=
maxNum
*
2
+
1
;
}
}
for
(
int
i
=
count
-
1
;
i
>=
pos
;
i
--
)
for
(
int
i
=
count
-
1
;
i
>=
pos
;
i
--
)
items
[
i
+
1
]
=
items
[
i
];
items
[
i
+
1
]
=
items
[
i
];
items
[
pos
]
=
item
;
items
[
pos
]
=
item
;
count
++
;
count
++
;
}
}
/* get the item at position i */
/* get the item at position i */
...
@@ -226,8 +226,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item)
...
@@ -226,8 +226,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item)
template
<
typename
T
>
template
<
typename
T
>
inline
void
TensorListBase
<
T
>::
SetItem
(
int
i
,
T
&&
item
)
inline
void
TensorListBase
<
T
>::
SetItem
(
int
i
,
T
&&
item
)
{
{
if
(
i
>=
0
&&
i
<
count
)
if
(
i
>=
0
&&
i
<
count
)
items
[
i
]
=
std
::
move
(
item
);
items
[
i
]
=
std
::
move
(
item
);
}
}
/*
/*
...
@@ -250,7 +250,7 @@ inline int TensorListBase<T>::FindFirst(const T& item)
...
@@ -250,7 +250,7 @@ inline int TensorListBase<T>::FindFirst(const T& item)
template
<
typename
T
>
template
<
typename
T
>
void
TensorListBase
<
T
>::
Clear
()
void
TensorListBase
<
T
>::
Clear
()
{
{
count
=
0
;
count
=
0
;
}
}
/*
/*
...
...
source/tensor/XList.h
查看文件 @
771643c6
...
@@ -32,7 +32,7 @@
...
@@ -32,7 +32,7 @@
/* the nts (NiuTrans.Tensor) namespace */
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
namespace
nts
{
/* the TensorListBase class */
/* the TensorListBase class */
template
<
typename
T
>
template
<
typename
T
>
struct
TensorListBase
{
struct
TensorListBase
{
...
@@ -66,57 +66,57 @@ public:
...
@@ -66,57 +66,57 @@ public:
/* add an item into the list */
/* add an item into the list */
void
Add
(
T
&&
item
);
void
Add
(
T
&&
item
);
/* add an item into the list */
/* add an item into the list */
void
Add
(
const
T
&
item
);
void
Add
(
const
T
&
item
);
/* add a number of items into the list */
/* add a number of items into the list */
void
Add
(
T
*
inputItems
,
int
inputItemCount
);
void
Add
(
T
*
inputItems
,
int
inputItemCount
);
/* append a list to the current list */
/* append a list to the current list */
void
AddList
(
TensorListBase
*
l
);
void
AddList
(
TensorListBase
*
l
);
/* insert an item to the given position of the list */
/* insert an item to the given position of the list */
void
Insert
(
int
pos
,
const
T
&
item
);
void
Insert
(
int
pos
,
const
T
&
item
);
/* insert an item to the given position of the list */
/* insert an item to the given position of the list */
void
Insert
(
int
pos
,
T
&&
item
);
void
Insert
(
int
pos
,
T
&&
item
);
/* get the item at position i */
/* get the item at position i */
T
&
GetItem
(
int
i
)
const
;
T
&
GetItem
(
int
i
)
const
;
/* set the item at position i */
/* set the item at position i */
void
SetItem
(
int
i
,
const
T
&
item
);
void
SetItem
(
int
i
,
const
T
&
item
);
/* set the item at position i */
/* set the item at position i */
void
SetItem
(
int
i
,
T
&&
item
);
void
SetItem
(
int
i
,
T
&&
item
);
/* find the position of the first matched item */
/* find the position of the first matched item */
int
FindFirst
(
const
T
&
item
);
int
FindFirst
(
const
T
&
item
);
/* clear the data array */
/* clear the data array */
void
Clear
();
void
Clear
();
/* sort the list */
/* sort the list */
void
Sort
(
int
itemSize
);
void
Sort
(
int
itemSize
);
/* reverse the list */
/* reverse the list */
void
Reverse
();
void
Reverse
();
/* remove the item at position i */
/* remove the item at position i */
void
Remove
(
int
i
);
void
Remove
(
int
i
);
/* copy the list */
/* copy the list */
TensorListBase
*
Copy
(
XMem
*
myMem
);
TensorListBase
*
Copy
(
XMem
*
myMem
);
/* shuffle the list */
/* shuffle the list */
void
Shuffle
(
int
nround
=
10
,
int
beg
=
-
1
,
int
len
=
0
);
void
Shuffle
(
int
nround
=
10
,
int
beg
=
-
1
,
int
len
=
0
);
/* short */
/* short */
T
&
operator
[]
(
int
i
)
{
T
&
operator
[]
(
int
i
)
{
return
GetItem
(
i
);
return
GetItem
(
i
);
};
};
T
&
Get
(
int
i
)
{
return
GetItem
(
i
);
};
T
&
Get
(
int
i
)
{
return
GetItem
(
i
);
};
void
Set
(
int
i
,
T
item
)
{
SetItem
(
i
,
item
);
};
void
Set
(
int
i
,
T
item
)
{
SetItem
(
i
,
item
);
};
};
};
struct
XTensor
;
struct
XTensor
;
...
...
source/tensor/XMem.cpp
查看文件 @
771643c6
...
@@ -305,7 +305,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
...
@@ -305,7 +305,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy
(
cublasHandle
);
cublasDestroy
(
cublasHandle
);
if
(
myIsForComputation
)
if
(
myIsForComputation
)
CheckNTErrors
((
enum
curandStatus
)
cublasCreate
(
&
cublasHandle
)
==
CURAND_STATUS_SUCCESS
,
CheckNTErrors
((
enum
curandStatus
)
cublasCreate
(
&
cublasHandle
)
==
CURAND_STATUS_SUCCESS
,
"Cannot create the cublas handle."
);
"Cannot create the cublas handle."
);
SetDevice
(
devIDBackup
);
SetDevice
(
devIDBackup
);
#endif
#endif
...
@@ -321,11 +321,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
...
@@ -321,11 +321,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{
{
delete
[]
memIndex
;
delete
[]
memIndex
;
delete
[]
memIndex2
;
delete
[]
memIndex2
;
delete
[]
minSizeIndex
;
delete
[]
minSizeIndex
;
nodeNum
=
indexSize
;
nodeNum
=
indexSize
;
nodeNumUsed
=
minSizeNum
*
2
;
nodeNumUsed
=
minSizeNum
*
2
;
indexEntryNum
=
minSizeNum
;
indexEntryNum
=
minSizeNum
;
memIndex
=
new
MPieceNode
[
nodeNum
];
memIndex
=
new
MPieceNode
[
nodeNum
];
memset
(
memIndex
,
0
,
sizeof
(
MPieceNode
)
*
nodeNum
);
memset
(
memIndex
,
0
,
sizeof
(
MPieceNode
)
*
nodeNum
);
...
@@ -333,12 +333,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
...
@@ -333,12 +333,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2
=
new
MPieceNode
[
nodeNum
];
memIndex2
=
new
MPieceNode
[
nodeNum
];
memset
(
memIndex2
,
0
,
sizeof
(
MPieceNode
)
*
nodeNum
);
memset
(
memIndex2
,
0
,
sizeof
(
MPieceNode
)
*
nodeNum
);
minSizeIndex
=
new
MTYPE
[
indexEntryNum
];
minSizeIndex
=
new
MTYPE
[
indexEntryNum
];
memset
(
minSizeIndex
,
0
,
sizeof
(
MTYPE
)
*
indexEntryNum
);
memset
(
minSizeIndex
,
0
,
sizeof
(
MTYPE
)
*
indexEntryNum
);
minSizeIndex
[
0
]
=
minSizeFirst
;
minSizeIndex
[
0
]
=
minSizeFirst
;
for
(
int
i
=
1
;
i
<
indexEntryNum
;
i
++
)
for
(
int
i
=
1
;
i
<
indexEntryNum
;
i
++
)
minSizeIndex
[
i
]
=
minSizeIndex
[
i
-
1
]
*
2
;
minSizeIndex
[
i
]
=
minSizeIndex
[
i
-
1
]
*
2
;
indexOffset
=
GetMSB
(
minSizeFirst
);
indexOffset
=
GetMSB
(
minSizeFirst
);
}
}
...
@@ -757,8 +757,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
...
@@ -757,8 +757,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */
/* if all index nodes are used, we rebuild the index to release the nodes that are free */
if
(
nodeNumUsed
==
nodeNum
){
if
(
nodeNumUsed
==
nodeNum
){
RebuildIndex
();
RebuildIndex
();
CheckNTErrors
(
nodeNumUsed
<
nodeNum
,
"No enough index nodes for the memory pool!"
);
CheckNTErrors
(
nodeNumUsed
<
nodeNum
,
"No enough index nodes for the memory pool!"
);
}
}
/*if(testxmemid == 30){
/*if(testxmemid == 30){
...
@@ -961,8 +961,8 @@ release a piece of memory as "free"
...
@@ -961,8 +961,8 @@ release a piece of memory as "free"
*/
*/
void
XMem
::
ReleaseStandard
(
int
myDevID
,
void
*
p
,
MTYPE
size
)
void
XMem
::
ReleaseStandard
(
int
myDevID
,
void
*
p
,
MTYPE
size
)
{
{
if
(
p
==
NULL
)
if
(
p
==
NULL
)
return
;
return
;
if
(
size
<=
minSizeIndex
[
0
])
if
(
size
<=
minSizeIndex
[
0
])
size
=
minSizeIndex
[
0
];
size
=
minSizeIndex
[
0
];
...
@@ -1092,7 +1092,7 @@ void XMem::RebuildIndex()
...
@@ -1092,7 +1092,7 @@ void XMem::RebuildIndex()
block
->
mem
=
NULL
;
block
->
mem
=
NULL
;
}
}
else
{
else
{
/* if the block is in use, we build the index */
/* if the block is in use, we build the index */
int
pieceCount
=
0
;
int
pieceCount
=
0
;
MTYPE
size
=
0
;
MTYPE
size
=
0
;
MHeader
*
newLast
=
NULL
;
MHeader
*
newLast
=
NULL
;
...
...
source/tensor/XQueue-李垠桥的MacBook Pro.cpp
deleted
100644 → 0
查看文件 @
04f129fc
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is an implementation of queue. Actually we intend to use it to maintain
* a priority job list
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-04-05
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "XQueue.h"
#include "XDevice.h"
#include "XList.h"
#include "XUtility.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
/**************************************
job item used in queues
*/
/* constructor */
JobQueueNode
::
JobQueueNode
()
{
job
=
NULL
;
args
=
new
TensorList
(
1
);
}
/* de-constructor */
JobQueueNode
::~
JobQueueNode
()
{
delete
args
;
}
/**************************************
This class provides standard utilities of Queue.
*/
/* constuctor */
XQueue
::
XQueue
(
int
mySize
)
{
queue
=
new
void
*
[
mySize
];
memset
(
queue
,
0
,
sizeof
(
void
*
)
*
mySize
);
size
=
mySize
;
itemCount
=
0
;
head
=
0
;
tail
=
0
;
isJobQueue
=
false
;
jobDequeuerArgs
=
new
TensorList
(
1
);
jobDequeuerBreak
=
false
;
runningJobCount
=
0
;
jobStream
=
NULL
;
jobStream1
=
NULL
;
jobStream2
=
NULL
;
MUTEX_INIT
(
enqueueMutex
);
MUTEX_INIT
(
dequeueMutex
);
COND_INIT
(
queueCond
);
MUTEX_INIT
(
jobQueueMutex
);
}
/* deconstructor */
XQueue
::~
XQueue
()
{
delete
[]
queue
;
delete
jobDequeuerArgs
;
delete
jobStream
;
delete
jobStream1
;
delete
jobStream2
;
//if(isJobQueue)
// StopJobConsumer();
MUTEX_DELE
(
enqueueMutex
);
MUTEX_DELE
(
dequeueMutex
);
COND_DELE
(
queueCond
);
MUTEX_DELE
(
jobQueueMutex
);
}
/*
put an item in the tail of the queue
>> item - the item we intend to add into the queue
*/
void
XQueue
::
Enqueue
(
void
*
item
)
{
MUTEX_LOCK
(
enqueueMutex
);
MUTEX_LOCK
(
dequeueMutex
);
CheckNTErrors
((
itemCount
<
size
),
"Put too many items into the queue!"
);
queue
[
tail
]
=
item
;
tail
=
(
tail
+
1
)
%
size
;
itemCount
++
;
COND_SIGNAL
(
queueCond
);
MUTEX_UNLOCK
(
dequeueMutex
);
MUTEX_UNLOCK
(
enqueueMutex
);
}
/*
fetch an item from head of the queue
<< return - the head item of the queue
*/
void
*
XQueue
::
Dequeue
()
{
MUTEX_LOCK
(
dequeueMutex
);
while
(
itemCount
==
0
)
{
#ifdef WIN32
MUTEX_UNLOCK
(
dequeueMutex
);
#endif
COND_WAIT
(
queueCond
,
dequeueMutex
);
#ifdef WIN32
MUTEX_LOCK
(
dequeueMutex
);
#endif
}
void
*
r
=
queue
[
head
];
head
=
(
head
+
1
)
%
size
;
itemCount
--
;
MUTEX_UNLOCK
(
dequeueMutex
);
return
r
;
}
/* return if the queue is empty */
bool
XQueue
::
IsEmpty
()
{
return
itemCount
==
0
;
}
/* wait until the queue is empty */
void
XQueue
::
WaitForEmptyJobQueue
()
{
while
(
runningJobCount
>
0
){
XSleep
(
10
);
}
if
(
jobStream
!=
NULL
){
CheckNTErrors
((
jobStream
->
IsFinished
()),
"None fineished jobs remain"
);
jobStream
->
Clear
();
}
if
(
jobStream1
!=
NULL
){
CheckNTErrors
((
jobStream1
->
IsFinished
()),
"None fineished jobs remain"
);
jobStream1
->
Clear
();
}
if
(
jobStream2
!=
NULL
){
CheckNTErrors
((
jobStream2
->
IsFinished
()),
"None fineished jobs remain"
);
jobStream2
->
Clear
();
}
}
int
devids
[
16
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
};
int
cpuid
=
-
1
;
/*
run job consumer (in another thread)
>> jobDevID - id of the device for running the jobs
*/
void
XQueue
::
RunJobConsumer
(
int
jobDevID
)
{
CheckNTErrors
((
jobDevID
<
16
),
"device id is out of scope!"
);
isJobQueue
=
true
;
jobDequeuerArgs
->
Clear
();
jobDequeuerArgs
->
Add
(
this
);
jobDequeuerArgs
->
Add
(
jobDevID
>=
0
?
devids
+
jobDevID
:
&
cpuid
);
jobDequeuer
.
function
=
(
TFunction
)
DequeueJobs
;
jobDequeuer
.
argv
=
jobDequeuerArgs
;
jobDequeuer
.
Start
();
jobDequeuer
.
LetItGo
();
}
/* stop the job consumer */
void
XQueue
::
StopJobConsumer
()
{
jobDequeuerBreak
=
true
;
XSleep
(
10
);
EnqueueJob
(
NULL
,
NULL
);
jobDequeuer
.
End
();
isJobQueue
=
false
;
}
/* add a job item to process */
void
XQueue
::
EnqueueJob
(
void
*
job
,
TensorList
*
jobArgs
)
{
MUTEX_LOCK
(
jobQueueMutex
);
runningJobCount
++
;
MUTEX_UNLOCK
(
jobQueueMutex
);
JobQueueNode
*
node
=
new
JobQueueNode
();
node
->
job
=
job
;
if
(
jobArgs
!=
NULL
)
node
->
args
->
AddList
(
jobArgs
);
Enqueue
(
node
);
}
/* job item consumer */
void
XQueue
::
DequeueJobs
(
TensorList
*
args
)
{
CheckNTErrors
((
args
->
count
==
2
),
"Illegal arguments!"
);
XQueue
*
q
=
(
XQueue
*
)
args
->
GetItem
(
0
);
int
devID
=
*
(
int
*
)
args
->
GetItem
(
1
);
int
devIDBackup
=
XDevice
::
GetGPUDevice
();
if
(
devID
>=
0
)
XDevice
::
SetGPUDevice
(
devID
);
while
(
1
){
JobQueueNode
*
node
=
(
JobQueueNode
*
)
q
->
Dequeue
();
if
(
q
->
GetJobBreak
())
break
;
CheckNTErrors
((
node
!=
NULL
),
"Illegal job!"
);
/* process a job */
((
TFunction
)
node
->
job
)(
node
->
args
);
delete
node
;
MUTEX_LOCK
(
q
->
jobQueueMutex
);
q
->
runningJobCount
--
;
MUTEX_UNLOCK
(
q
->
jobQueueMutex
);
}
if
(
devID
>=
0
)
XDevice
::
SetGPUDevice
(
devIDBackup
);
}
/* get the break flag */
bool
XQueue
::
GetJobBreak
()
{
return
jobDequeuerBreak
;
}
/* get job stream */
XStream
*
XQueue
::
GetJobStream
(
int
n
)
{
if
(
n
==
0
)
return
jobStream
;
else
if
(
n
==
1
)
return
jobStream1
;
else
if
(
n
==
2
)
return
jobStream2
;
else
{
ShowNTErrors
(
"invalid stream id!"
);
}
return
NULL
;
}
/* make job streams */
void
XQueue
::
MakeJobStreams
(
int
devID
,
int
devID1
,
int
devID2
)
{
if
(
devID
!=
INVALID_DEVICE_ID
)
jobStream
=
new
XStream
(
0
,
devID
);
if
(
devID1
!=
INVALID_DEVICE_ID
)
jobStream1
=
new
XStream
(
0
,
devID1
);
if
(
devID2
!=
INVALID_DEVICE_ID
)
jobStream2
=
new
XStream
(
0
,
devID2
);
}
}
/* end of the nts (NiuTrans.Tensor) namespace */
source/tensor/XQueue.cpp
查看文件 @
771643c6
...
@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID)
...
@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue
=
true
;
isJobQueue
=
true
;
jobDequeuerArgs
->
Clear
();
jobDequeuerArgs
->
Clear
();
// warning: this may cause unknown error
// warning: this may cause unknown error
jobDequeuerArgs
->
Add
((
XTensor
*
)
this
);
jobDequeuerArgs
->
Add
((
XTensor
*
)
this
);
jobDequeuerArgs
->
Add
(
jobDevID
>=
0
?
(
XTensor
*
)(
devids
+
jobDevID
)
:
(
XTensor
*
)
&
cpuid
);
jobDequeuerArgs
->
Add
(
jobDevID
>=
0
?
(
XTensor
*
)(
devids
+
jobDevID
)
:
(
XTensor
*
)
&
cpuid
);
...
...
source/tensor/XTensor.cpp
查看文件 @
771643c6
...
@@ -190,7 +190,6 @@ XTensor::XTensor(const XTensor &reference)
...
@@ -190,7 +190,6 @@ XTensor::XTensor(const XTensor &reference)
isInit
=
true
;
isInit
=
true
;
isTmp
=
reference
.
isTmp
;
isTmp
=
reference
.
isTmp
;
enableGrad
=
reference
.
enableGrad
;
}
}
/* copy constructor (with right value reference) */
/* copy constructor (with right value reference) */
...
@@ -219,7 +218,6 @@ XTensor::XTensor(const XTensor &&reference)
...
@@ -219,7 +218,6 @@ XTensor::XTensor(const XTensor &&reference)
isInit
=
true
;
isInit
=
true
;
isTmp
=
reference
.
isTmp
;
isTmp
=
reference
.
isTmp
;
enableGrad
=
reference
.
enableGrad
;
}
}
/* de-constructor */
/* de-constructor */
...
@@ -285,7 +283,7 @@ void XTensor::Init()
...
@@ -285,7 +283,7 @@ void XTensor::Init()
isTmp
=
false
;
isTmp
=
false
;
isGrad
=
false
;
isGrad
=
false
;
isVar
=
false
;
isVar
=
false
;
enableGrad
=
false
;
enableGrad
=
false
;
visitMark
=
0
;
visitMark
=
0
;
grad
=
NULL
;
grad
=
NULL
;
}
}
...
@@ -316,6 +314,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
...
@@ -316,6 +314,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{
{
strcpy
(
name
,
tensor
.
name
);
strcpy
(
name
,
tensor
.
name
);
order
=
tensor
.
order
;
order
=
tensor
.
order
;
enableGrad
=
tensor
.
enableGrad
;
memcpy
(
dimSize
,
tensor
.
dimSize
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
memcpy
(
dimSize
,
tensor
.
dimSize
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
memcpy
(
dimSizeRDI
,
tensor
.
dimSizeRDI
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
memcpy
(
dimSizeRDI
,
tensor
.
dimSizeRDI
,
sizeof
(
int
)
*
MAX_TENSOR_DIM_NUM
);
dataType
=
tensor
.
dataType
;
dataType
=
tensor
.
dataType
;
...
@@ -403,7 +402,6 @@ XTensor& XTensor::operator= (const XTensor& tensor)
...
@@ -403,7 +402,6 @@ XTensor& XTensor::operator= (const XTensor& tensor)
/* create tensor links for the new tensor */
/* create tensor links for the new tensor */
XLink
::
Replace
(
&
tensor
,
this
);
XLink
::
Replace
(
&
tensor
,
this
);
}
}
enableGrad
=
tensor
.
enableGrad
;
return
*
this
;
return
*
this
;
}
}
...
@@ -450,7 +448,6 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
...
@@ -450,7 +448,6 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
*
tensor
.
dataP
=
NULL
;
*
tensor
.
dataP
=
NULL
;
XLink
::
Replace
(
&
tensor
,
this
);
XLink
::
Replace
(
&
tensor
,
this
);
enableGrad
=
tensor
.
enableGrad
;
return
*
this
;
return
*
this
;
}
}
...
@@ -1322,7 +1319,7 @@ set the value of a cell
...
@@ -1322,7 +1319,7 @@ set the value of a cell
*/
*/
bool
XTensor
::
Set
(
DTYPE
value
,
int
index
[],
int
size
)
bool
XTensor
::
Set
(
DTYPE
value
,
int
index
[],
int
size
)
{
{
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in default type."
);
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in default type."
);
return
SetToDevice
(
devID
,
GetCell
(
index
,
size
),
value
);
return
SetToDevice
(
devID
,
GetCell
(
index
,
size
),
value
);
}
}
...
@@ -2447,7 +2444,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
...
@@ -2447,7 +2444,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
if
(
reference
->
order
<
0
)
if
(
reference
->
order
<
0
)
return
;
return
;
tensor
->
enableGrad
=
reference
->
enableGrad
;
tensor
->
enableGrad
=
reference
->
enableGrad
;
InitTensor
(
tensor
,
reference
->
order
,
reference
->
dimSize
,
InitTensor
(
tensor
,
reference
->
order
,
reference
->
dimSize
,
reference
->
dataType
,
reference
->
denseRatio
,
reference
->
dataType
,
reference
->
denseRatio
,
reference
->
devID
,
reference
->
mem
);
reference
->
devID
,
reference
->
mem
);
...
@@ -2463,7 +2460,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
...
@@ -2463,7 +2460,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
if
(
reference
->
order
<
0
)
if
(
reference
->
order
<
0
)
return
;
return
;
tensor
->
enableGrad
=
reference
->
enableGrad
;
tensor
->
enableGrad
=
reference
->
enableGrad
;
InitTensorV2
(
tensor
,
reference
->
order
,
reference
->
dimSize
,
InitTensorV2
(
tensor
,
reference
->
order
,
reference
->
dimSize
,
reference
->
dataType
,
reference
->
devID
);
reference
->
dataType
,
reference
->
devID
);
}
}
...
@@ -2478,7 +2475,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
...
@@ -2478,7 +2475,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if
(
reference
->
order
<
0
)
if
(
reference
->
order
<
0
)
return
;
return
;
tensor
->
enableGrad
=
reference
->
enableGrad
;
tensor
->
enableGrad
=
reference
->
enableGrad
;
InitTensor
(
tensor
,
reference
->
order
,
reference
->
dimSize
,
InitTensor
(
tensor
,
reference
->
order
,
reference
->
dimSize
,
reference
->
dataType
,
reference
->
denseRatio
,
reference
->
dataType
,
reference
->
denseRatio
,
-
1
);
-
1
);
...
...
source/tensor/XTensor.h
查看文件 @
771643c6
...
@@ -151,8 +151,8 @@ public:
...
@@ -151,8 +151,8 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool
isGrad
;
bool
isGrad
;
/* indicates whether the gradient of the tensor should be computed */
/* indicates whether the gradient of the tensor should be computed */
bool
enableGrad
;
bool
enableGrad
;
/* indicates whether the tensor is used as paramters (or variables) */
/* indicates whether the tensor is used as paramters (or variables) */
bool
isVar
;
bool
isVar
;
...
@@ -453,7 +453,7 @@ extern int MakeTensorID();
...
@@ -453,7 +453,7 @@ extern int MakeTensorID();
void
InitTensor
(
XTensor
*
tensor
,
void
InitTensor
(
XTensor
*
tensor
,
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
float
myDenseRatio
=
1
.
0
F
,
const
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
const
float
myDenseRatio
=
1
.
0
F
,
const
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* initialize a dense XTensor V2 */
/* initialize a dense XTensor V2 */
void
InitTensorV2
(
XTensor
*
tensor
,
void
InitTensorV2
(
XTensor
*
tensor
,
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
const
int
myOrder
,
const
int
*
myDimSize
,
const
TENSOR_DATA_TYPE
myDataType
=
X_FLOAT
,
...
...
source/tensor/core/arithmetic/Div.cpp
查看文件 @
771643c6
...
@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
...
@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div
(
a
,
b
,
a
,
alpha
,
leadingDim
);
_Div
(
a
,
b
,
a
,
alpha
,
leadingDim
);
}
}
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void
DivMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
,
int
leadingDim
)
{
_Div
(
&
a
,
&
b
,
&
a
,
alpha
,
leadingDim
);
}
/*
/*
return a dimension if the division is performed as DivDim (in more details in DivDim.h)
return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor
>> a - a tensor
...
...
source/tensor/core/arithmetic/Div.cu
查看文件 @
771643c6
...
@@ -122,7 +122,7 @@ where i is the item index
...
@@ -122,7 +122,7 @@ where i is the item index
*/
*/
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
{
int leadingDimRDI = a->order - leadingDim - 1;
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
...
...
source/tensor/core/arithmetic/Div.h
查看文件 @
771643c6
...
@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
...
@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element
where i is the index of the element
*/
*/
void
_DivMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
void
_DivMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
void
DivMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
/*
/*
element-wise division of two tensors (return an XTensor structure)
element-wise division of two tensors (return an XTensor structure)
...
...
source/tensor/core/arithmetic/Mask.cpp
查看文件 @
771643c6
...
@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
...
@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
}
}
/*
/*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void
MaskMe
(
XTensor
&
a
,
const
XTensor
&
mask
,
DTYPE
alpha
)
{
_Mask
(
&
a
,
&
mask
,
&
a
,
alpha
);
}
/*
mask entries of a given tensor (return an XTensor structure):
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
a(i) = alpha if mask(i) = 0
...
...
source/tensor/core/arithmetic/Mask.h
查看文件 @
771643c6
...
@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0
...
@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0
where i is the index of the element
where i is the index of the element
*/
*/
void
_MaskMe
(
XTensor
*
a
,
const
XTensor
*
mask
,
DTYPE
alpha
);
void
_MaskMe
(
XTensor
*
a
,
const
XTensor
*
mask
,
DTYPE
alpha
);
void
MaskMe
(
XTensor
&
a
,
const
XTensor
&
mask
,
DTYPE
alpha
);
/*
/*
mask entries of a given tensor (return an XTensor structure):
mask entries of a given tensor (return an XTensor structure):
...
...
source/tensor/core/arithmetic/MatrixMul2D.cpp
查看文件 @
771643c6
...
@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors
((
a
->
order
==
2
&&
b
->
order
==
2
&&
c
->
order
==
2
),
CheckNTErrors
((
a
->
order
==
2
&&
b
->
order
==
2
&&
c
->
order
==
2
),
"Input tensors must have a order = 2!"
);
"Input tensors must have a order = 2!"
);
int
an
=
a
->
dimSize
[
0
],
am
=
a
->
dimSize
[
1
];
int
an
=
a
->
dimSize
[
0
],
am
=
a
->
dimSize
[
1
];
int
bn
=
b
->
dimSize
[
0
],
bm
=
b
->
dimSize
[
1
];
int
bn
=
b
->
dimSize
[
0
],
bm
=
b
->
dimSize
[
1
];
int
cn
=
c
->
dimSize
[
0
],
cm
=
c
->
dimSize
[
1
];
int
cn
=
c
->
dimSize
[
0
],
cm
=
c
->
dimSize
[
1
];
int
am2
=
transposedA
==
X_TRANS
?
an
:
am
;
int
am2
=
transposedA
==
X_TRANS
?
an
:
am
;
int
an2
=
transposedA
==
X_TRANS
?
am
:
an
;
int
an2
=
transposedA
==
X_TRANS
?
am
:
an
;
int
bm2
=
transposedB
==
X_TRANS
?
bn
:
bm
;
int
bm2
=
transposedB
==
X_TRANS
?
bn
:
bm
;
int
bn2
=
transposedB
==
X_TRANS
?
bm
:
bn
;
int
bn2
=
transposedB
==
X_TRANS
?
bm
:
bn
;
int
cm2
=
cm
;
int
cm2
=
cm
;
int
cn2
=
cn
;
int
cn2
=
cn
;
CheckNTErrors
((
am2
==
bn2
&&
an2
==
cn2
&&
bm2
==
cm2
),
CheckNTErrors
((
am2
==
bn2
&&
an2
==
cn2
&&
bm2
==
cm2
),
"Unmatched tensors in multiplication!"
);
"Unmatched tensors in multiplication!"
);
...
...
source/tensor/core/arithmetic/MatrixMul2DMultiTheading.cpp
查看文件 @
771643c6
...
@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta)
...
@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta)
*/
*/
void
_MatrixMul2DMultiTheading
(
TensorList
*
args
)
void
_MatrixMul2DMultiTheading
(
TensorList
*
args
)
{
{
CheckNTErrors
(
args
->
count
==
2
,
"invalid argument number!"
);
CheckNTErrors
(
args
->
count
==
2
,
"invalid argument number!"
);
IntList
*
indexArgs
=
(
IntList
*
)
args
->
GetItem
(
0
);
IntList
*
indexArgs
=
(
IntList
*
)
args
->
GetItem
(
0
);
TensorList
*
matrixArgs
=
(
TensorList
*
)
args
->
GetItem
(
1
);
TensorList
*
matrixArgs
=
(
TensorList
*
)
args
->
GetItem
(
1
);
CheckNTErrors
(
indexArgs
->
count
==
4
,
"invalid argument number!"
);
CheckNTErrors
(
indexArgs
->
count
==
4
,
"invalid argument number!"
);
CheckNTErrors
(
matrixArgs
->
count
==
5
,
"invalid argument number!"
);
CheckNTErrors
(
matrixArgs
->
count
==
5
,
"invalid argument number!"
);
XTensor
*
a
=
matrixArgs
->
GetItem
(
0
);
XTensor
*
a
=
matrixArgs
->
GetItem
(
0
);
XTensor
*
b
=
matrixArgs
->
GetItem
(
1
);
XTensor
*
b
=
matrixArgs
->
GetItem
(
1
);
XTensor
*
c
=
matrixArgs
->
GetItem
(
2
);
XTensor
*
c
=
matrixArgs
->
GetItem
(
2
);
DTYPE
alpha
=
*
(
DTYPE
*
)(
matrixArgs
->
GetItem
(
3
));
DTYPE
alpha
=
*
(
DTYPE
*
)(
matrixArgs
->
GetItem
(
3
));
DTYPE
beta
=
*
(
DTYPE
*
)(
matrixArgs
->
GetItem
(
4
));
DTYPE
beta
=
*
(
DTYPE
*
)(
matrixArgs
->
GetItem
(
4
));
int
x1
=
indexArgs
->
GetItem
(
0
);
int
x1
=
indexArgs
->
GetItem
(
0
);
int
y1
=
indexArgs
->
GetItem
(
1
);
int
y1
=
indexArgs
->
GetItem
(
1
);
int
x2
=
indexArgs
->
GetItem
(
2
);
int
x2
=
indexArgs
->
GetItem
(
2
);
int
y2
=
indexArgs
->
GetItem
(
3
);
int
y2
=
indexArgs
->
GetItem
(
3
);
#ifdef FAST_MATRIX
#ifdef FAST_MATRIX
int
am
=
a
->
dimSize
[
1
];
int
am
=
a
->
dimSize
[
1
];
...
...
source/tensor/core/arithmetic/Multiply.cpp
查看文件 @
771643c6
...
@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
...
@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Multiply
(
a
,
b
,
a
,
alpha
,
leadingDim
);
_Multiply
(
a
,
b
,
a
,
alpha
,
leadingDim
);
}
}
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void
MultiplyMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
,
int
leadingDim
)
{
_Multiply
(
&
a
,
&
b
,
&
a
,
alpha
,
leadingDim
);
}
/*
/*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor
>> a - a tensor
...
...
source/tensor/core/arithmetic/Multiply.cu
查看文件 @
771643c6
...
@@ -122,7 +122,7 @@ where i is the item index
...
@@ -122,7 +122,7 @@ where i is the item index
*/
*/
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
{
int leadingDimRDI = a->order - leadingDim - 1;
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
...
...
source/tensor/core/arithmetic/Multiply.h
查看文件 @
771643c6
...
@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
...
@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element
where i is the index of the element
*/
*/
void
_MultiplyMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
void
_MultiplyMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
void
MultiplyMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
/*
/*
element-wise product of two tensors (return an XTensor structure)
element-wise product of two tensors (return an XTensor structure)
...
...
source/tensor/core/arithmetic/MultiplyDim.cpp
查看文件 @
771643c6
...
@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
...
@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
}
}
/*
/*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void
MultiplyDimMe
(
XTensor
&
a
,
const
XTensor
&
b
,
int
n
,
DTYPE
alpha
)
{
_MultiplyDim
(
&
a
,
&
b
,
&
a
,
n
,
alpha
);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections)
tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/arithmetic/MultiplyDim.h
查看文件 @
771643c6
...
@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
...
@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void
_MultiplyDimMe
(
XTensor
*
a
,
const
XTensor
*
b
,
int
n
,
DTYPE
alpha
=
0
.
0
);
void
_MultiplyDimMe
(
XTensor
*
a
,
const
XTensor
*
b
,
int
n
,
DTYPE
alpha
=
0
.
0
);
void
MultiplyDimMe
(
XTensor
&
a
,
const
XTensor
&
b
,
int
n
,
DTYPE
alpha
=
0
.
0
);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
...
...
source/tensor/core/arithmetic/Negate.cpp
查看文件 @
771643c6
...
@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a)
...
@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a)
}
}
/*
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
NegateMe
(
XTensor
&
a
)
{
_Negate
(
&
a
,
&
a
);
}
/*
set every entry to its minus value (return an XTensor structure)
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
>> a - input tensor we are processing
>> a - input tensor we are processing
...
...
source/tensor/core/arithmetic/Negate.h
查看文件 @
771643c6
...
@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site)
...
@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
keep the result in the input tensor a and return nothing
*/
*/
void
_NegateMe
(
XTensor
*
a
);
void
_NegateMe
(
XTensor
*
a
);
void
NegateMe
(
XTensor
&
a
);
/*
/*
set every entry to its minus value (return an XTensor structure)
set every entry to its minus value (return an XTensor structure)
...
...
source/tensor/core/arithmetic/Sign.cpp
查看文件 @
771643c6
...
@@ -66,6 +66,16 @@ void _SignMe(XTensor * a)
...
@@ -66,6 +66,16 @@ void _SignMe(XTensor * a)
}
}
/*
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
SignMe
(
XTensor
&
a
)
{
_Sign
(
&
a
,
&
a
);
}
/*
set every entry to its sign value (return an XTensor structure)
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
>> a - input tensor we are processing
>> a - input tensor we are processing
...
...
source/tensor/core/arithmetic/Sign.h
查看文件 @
771643c6
...
@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
...
@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
void
_SignMe
(
XTensor
*
a
);
void
_SignMe
(
XTensor
*
a
);
/*
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
SignMe
(
XTensor
&
a
);
/*
set every entry to its sign value (return an XTensor structure)
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
*/
*/
...
...
source/tensor/core/arithmetic/Sub.cpp
查看文件 @
771643c6
...
@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
...
@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
{
{
_Sub
(
a
,
b
,
a
,
beta
);
_Sub
(
a
,
b
,
a
,
beta
);
}
}
/*
tensor subtraction a = a - b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void
SubMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
beta
)
{
_Sub
(
&
a
,
&
b
,
&
a
,
beta
);
}
/*
/*
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
...
...
source/tensor/core/arithmetic/Sub.h
查看文件 @
771643c6
...
@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
...
@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
keep the result in the input tensor a and return nothing
keep the result in the input tensor a and return nothing
*/
*/
void
_SubMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
_SubMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
SubMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
/*
/*
tensor subtraction c = a - b * \beta
tensor subtraction c = a - b * \beta
...
...
source/tensor/core/arithmetic/SubDim.cpp
查看文件 @
771643c6
...
@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
...
@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
{
{
n
=
MODX
(
n
,
a
->
order
);
n
=
MODX
(
n
,
a
->
order
);
CheckNTErrors
(
a
&&
b
&&
c
,
"Empty tensor input!"
);
CheckNTErrors
(
a
&&
b
&&
c
,
"Empty tensor input!"
);
CheckNTErrors
(
a
->
unitNum
==
c
->
unitNum
,
"Unmatched tensors in subtraction!"
);
CheckNTErrors
(
a
->
unitNum
==
c
->
unitNum
,
"Unmatched tensors in subtraction!"
);
CheckNTErrors
(
a
->
dataType
==
b
->
dataType
&&
a
->
dataType
==
c
->
dataType
,
CheckNTErrors
(
a
->
dataType
==
b
->
dataType
&&
a
->
dataType
==
c
->
dataType
,
"Unmatched data types in subtraction!"
);
"Unmatched data types in subtraction!"
);
CheckNTErrors
(
a
->
order
==
c
->
order
,
"The input tensors do not have the same order in subtraction!"
);
CheckNTErrors
(
a
->
order
==
c
->
order
,
"The input tensors do not have the same order in subtraction!"
);
CheckNTErrors
(
!
a
->
isSparse
&&
!
b
->
isSparse
&&
!
c
->
isSparse
,
"Dense tensors are required!"
);
CheckNTErrors
(
!
a
->
isSparse
&&
!
b
->
isSparse
&&
!
c
->
isSparse
,
"Dense tensors are required!"
);
CheckNTErrors
(
a
->
dimSize
[
n
]
==
b
->
unitNum
,
"Wrong tensor size!"
);
CheckNTErrors
(
a
->
dimSize
[
n
]
==
b
->
unitNum
,
"Wrong tensor size!"
);
CheckDev
(
a
->
devID
,
b
->
devID
);
CheckDev
(
a
->
devID
,
b
->
devID
);
if
(
beta
==
0
)
{
if
(
beta
==
0
)
{
_CopyValues
(
a
,
c
);
_CopyValues
(
a
,
c
);
return
;
return
;
}
}
if
(
XTensor
::
IsSameShaped
(
a
,
b
))
{
if
(
XTensor
::
IsSameShaped
(
a
,
b
))
{
_Sub
(
a
,
b
,
c
,
beta
);
_Sub
(
a
,
b
,
c
,
beta
);
return
;
return
;
}
}
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
{
#ifdef USE_CUDA
#ifdef USE_CUDA
_CudaSubDim
(
a
,
b
,
c
,
n
,
beta
);
_CudaSubDim
(
a
,
b
,
c
,
n
,
beta
);
#else
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
#endif
}
}
else
{
else
{
int
stride
=
1
;
int
stride
=
1
;
int
blockSize
=
a
->
dimSize
[
n
];
int
blockSize
=
a
->
dimSize
[
n
];
int
blockNum
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
a
->
order
-
1
;
i
>=
0
;
i
--
)
{
for
(
int
i
=
a
->
order
-
1
;
i
>=
0
;
i
--
)
{
if
(
i
>
n
)
if
(
i
>
n
)
stride
*=
a
->
dimSize
[
i
];
stride
*=
a
->
dimSize
[
i
];
else
if
(
i
<
n
)
else
if
(
i
<
n
)
blockNum
*=
a
->
dimSize
[
i
];
blockNum
*=
a
->
dimSize
[
i
];
}
}
if
(
a
->
dataType
==
DEFAULT_DTYPE
)
{
if
(
a
->
dataType
==
DEFAULT_DTYPE
)
{
int
num
=
a
->
unitNum
;
int
num
=
a
->
unitNum
;
if
(
stride
>
1
)
{
if
(
stride
>
1
)
{
for
(
int
i
=
0
,
j
=
0
;
i
<
num
;
i
+=
stride
,
j
++
)
{
for
(
int
i
=
0
,
j
=
0
;
i
<
num
;
i
+=
stride
,
j
++
)
{
DTYPE
*
ap
=
(
DTYPE
*
)
a
->
data
+
i
;
DTYPE
*
ap
=
(
DTYPE
*
)
a
->
data
+
i
;
DTYPE
bv
=
*
((
DTYPE
*
)
b
->
data
+
j
%
blockSize
)
*
beta
;
DTYPE
bv
=
*
((
DTYPE
*
)
b
->
data
+
j
%
blockSize
)
*
beta
;
DTYPE
*
cp
=
(
DTYPE
*
)
c
->
data
+
i
;
DTYPE
*
cp
=
(
DTYPE
*
)
c
->
data
+
i
;
for
(
int
k
=
0
;
k
<
stride
;
k
++
)
for
(
int
k
=
0
;
k
<
stride
;
k
++
)
cp
[
k
]
=
ap
[
k
]
-
bv
;
cp
[
k
]
=
ap
[
k
]
-
bv
;
}
}
}
}
else
if
(
stride
==
1
)
{
else
if
(
stride
==
1
)
{
DTYPE
*
bp
=
(
DTYPE
*
)
b
->
data
;
DTYPE
*
bp
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
blockSize
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
blockSize
)
{
DTYPE
*
ap
=
(
DTYPE
*
)
a
->
data
+
i
;
DTYPE
*
ap
=
(
DTYPE
*
)
a
->
data
+
i
;
DTYPE
*
cp
=
(
DTYPE
*
)
c
->
data
+
i
;
DTYPE
*
cp
=
(
DTYPE
*
)
c
->
data
+
i
;
if
(
beta
==
1.0
F
)
{
if
(
beta
==
1.0
F
)
{
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
cp
[
j
]
=
ap
[
j
]
-
bp
[
j
];
cp
[
j
]
=
ap
[
j
]
-
bp
[
j
];
}
}
else
{
else
{
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
cp
[
j
]
=
ap
[
j
]
-
bp
[
j
]
*
beta
;
cp
[
j
]
=
ap
[
j
]
-
bp
[
j
]
*
beta
;
}
}
}
}
}
}
else
{
else
{
ShowNTErrors
(
"Something is wrong!"
);
ShowNTErrors
(
"Something is wrong!"
);
}
}
}
}
else
{
else
{
ShowNTErrors
(
"TODO!"
);
ShowNTErrors
(
"TODO!"
);
}
}
}
}
}
}
/*
/*
...
@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting
...
@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting
*/
*/
void
_SubDim
(
XTensor
*
a
,
const
XTensor
*
b
,
int
n
,
DTYPE
beta
)
void
_SubDim
(
XTensor
*
a
,
const
XTensor
*
b
,
int
n
,
DTYPE
beta
)
{
{
_SubDim
(
a
,
b
,
a
,
n
,
beta
);
_SubDim
(
a
,
b
,
a
,
n
,
beta
);
}
}
/*
/*
...
@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting
...
@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting
*/
*/
XTensor
SubDim
(
const
XTensor
&
a
,
const
XTensor
&
b
,
int
n
,
DTYPE
beta
)
XTensor
SubDim
(
const
XTensor
&
a
,
const
XTensor
&
b
,
int
n
,
DTYPE
beta
)
{
{
XTensor
c
(
&
a
);
XTensor
c
(
&
a
);
c
.
SetTMPFlag
();
c
.
SetTMPFlag
();
n
=
MODX
(
n
,
a
.
order
);
n
=
MODX
(
n
,
a
.
order
);
/* call _Sub function */
/* call _Sub function */
_SubDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
_SubDim
(
&
a
,
&
b
,
&
c
,
n
,
beta
);
/* tensor connections */
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUBDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
return
c
;
return
c
;
}
}
/*
/*
...
...
source/tensor/core/arithmetic/SubDim.cu
查看文件 @
771643c6
...
@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector
...
@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector
*/
*/
template <class T, bool betaFired>
template <class T, bool betaFired>
__global__
__global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{
{
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x;
int col = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
int row = blockDim.y * blockIdx.y + threadIdx.y;
if (col >= colNum || row >= rowNum)
if (col >= colNum || row >= rowNum)
return;
return;
if (threadIdx.y == 0)
if (threadIdx.y == 0)
bv[threadIdx.x] = b[col];
bv[threadIdx.x] = b[col];
__syncthreads();
__syncthreads();
int offset = colNum * row + col;
int offset = colNum * row + col;
if (betaFired)
if (betaFired)
c[offset] = a[offset] - bv[threadIdx.x] * beta;
c[offset] = a[offset] - bv[threadIdx.x] * beta;
else
else
c[offset] = a[offset] - bv[threadIdx.x];
c[offset] = a[offset] - bv[threadIdx.x];
}
}
/*
/*
...
@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector
...
@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector
*/
*/
template <class T, bool betaFired>
template <class T, bool betaFired>
__global__
__global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{
{
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % colNum;
int col = colIndex % colNum;
int block = colIndex / colNum;
int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum)
if (row >= rowNum || block >= blockNum)
return;
return;
if (threadIdx.x == 0)
if (threadIdx.x == 0)
bv[threadIdx.y] = b[row];
bv[threadIdx.y] = b[row];
__syncthreads();
__syncthreads();
int offset = block * blockSize + row * colNum + col;
int offset = block * blockSize + row * colNum + col;
if (betaFired)
if (betaFired)
c[offset] = a[offset] - bv[threadIdx.y] * beta;
c[offset] = a[offset] - bv[threadIdx.y] * beta;
else
else
c[offset] = a[offset] - bv[threadIdx.y];
c[offset] = a[offset] - bv[threadIdx.y];
}
}
/*
/*
...
@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting
...
@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting
*/
*/
void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta)
void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta)
{
{
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!");
"Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
int stride = 1;
int stride = 1;
int blockSize = a->dimSize[n];
int blockSize = a->dimSize[n];
int blockNum = 1;
int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) {
for (int i = a->order - 1; i >= 0; i--) {
if (i > n)
if (i > n)
stride *= a->dimSize[i];
stride *= a->dimSize[i];
else if (i < n)
else if (i < n)
blockNum *= a->dimSize[i];
blockNum *= a->dimSize[i];
}
}
int cudaGrids[3];
int cudaGrids[3];
int cudaBlocks[3];
int cudaBlocks[3];
int devIDBackup = 0;
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
if (stride > 1) {
if (stride > 1) {
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F)
if (beta == (DTYPE)1.0F)
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta);
blockSize, stride, blockSize * stride, blockNum, beta);
else
else
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta);
blockSize, stride, blockSize * stride, blockNum, beta);
}
}
else if (stride == 1) {
else if (stride == 1) {
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F)
if (beta == (DTYPE)1.0F)
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta);
blockNum, blockSize, beta);
else
else
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta);
blockNum, blockSize, beta);
}
}
else {
else {
ShowNTErrors("Something is wrong!");
ShowNTErrors("Something is wrong!");
}
}
}
}
else {
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
}
}
BacktoCudaDev(a->devID, devIDBackup);
BacktoCudaDev(a->devID, devIDBackup);
}
}
#endif
#endif
...
...
source/tensor/core/arithmetic/Sum.cpp
查看文件 @
771643c6
...
@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
...
@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
_Sum
(
a
,
b
,
a
,
beta
);
_Sum
(
a
,
b
,
a
,
beta
);
}
}
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void
SumMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
beta
)
{
_Sum
(
&
a
,
&
b
,
&
a
,
beta
);
}
/*
/*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor
>> a - a tensor
...
...
source/tensor/core/arithmetic/Sum.h
查看文件 @
771643c6
...
@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
...
@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing
keep the result in the input tensor a and return nothing
*/
*/
void
_SumMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
_SumMe
(
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
SumMe
(
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
/*
/*
tensor summation c = a + b * \beta
tensor summation c = a + b * \beta
...
...
source/tensor/core/arithmetic/XTensorBLAS.cpp
查看文件 @
771643c6
...
@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors
((
c
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
c
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
#if defined(USE_BLAS)
#if defined(USE_BLAS)
int
an
=
a
->
dimSize
[
0
];
int
an
=
a
->
dimSize
[
0
];
int
am
=
a
->
dimSize
[
1
];
int
am
=
a
->
dimSize
[
1
];
int
bn
=
b
->
dimSize
[
0
];
int
bn
=
b
->
dimSize
[
0
];
int
bm
=
b
->
dimSize
[
1
];
int
bm
=
b
->
dimSize
[
1
];
int
cn
=
c
->
dimSize
[
0
];
int
cn
=
c
->
dimSize
[
0
];
int
cm
=
c
->
dimSize
[
1
];
int
cm
=
c
->
dimSize
[
1
];
if
(
transposedA
==
X_NOTRANS
&&
transposedB
==
X_NOTRANS
)
if
(
transposedA
==
X_NOTRANS
&&
transposedB
==
X_NOTRANS
)
GEMM
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
cn
,
cm
,
am
,
alpha
,
(
DTYPE
*
)
a
->
data
,
am
,
(
DTYPE
*
)
b
->
data
,
bm
,
beta
,
(
DTYPE
*
)
c
->
data
,
cm
);
GEMM
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
cn
,
cm
,
am
,
alpha
,
(
DTYPE
*
)
a
->
data
,
am
,
(
DTYPE
*
)
b
->
data
,
bm
,
beta
,
(
DTYPE
*
)
c
->
data
,
cm
);
...
...
source/tensor/core/math/Binary.cpp
查看文件 @
771643c6
...
@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
...
@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_VOID
(
Shift
,
_Shift
,
MATH_SHIFT
)
SIMPLE_BINARY_FUNCTION_VOID
(
Shift
,
_Shift
,
MATH_SHIFT
)
_SIMPLE_BINARY_FUNCTION_INT
(
_Mod
,
_CudaMod
,
mod
)
_SIMPLE_BINARY_FUNCTION_INT
(
_Mod
,
_CudaMod
,
mod
)
SIMPLE_BINARY_FUNCTION_ME_INT
(
_
ModMe
,
_Mod
)
SIMPLE_BINARY_FUNCTION_ME_INT
(
ModMe
,
_Mod
)
SIMPLE_BINARY_FUNCTION_INT
(
Mod
,
_Mod
)
SIMPLE_BINARY_FUNCTION_INT
(
Mod
,
_Mod
)
#else
#else
...
...
source/tensor/core/math/Binary.h
查看文件 @
771643c6
...
@@ -37,9 +37,16 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
...
@@ -37,9 +37,16 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
scale up tensor entires (on site)
scale up tensor entires (on site)
b = a * scale
b = a * scale
*/
*/
void
_ScaleMe
(
XTensor
&
a
,
int
scale
);
void
_ScaleMe
(
XTensor
*
a
,
int
scale
);
void
_ScaleMe
(
XTensor
&
a
,
float
scale
);
void
_ScaleMe
(
XTensor
*
a
,
float
scale
);
/*
scale up tensor entires (on site)
b = a * scale
*/
void
ScaleMe
(
XTensor
&
a
,
int
scale
);
void
ScaleMe
(
XTensor
&
a
,
float
scale
);
/*
/*
scale up tensor entires
scale up tensor entires
b = a * scale
b = a * scale
...
@@ -64,8 +71,15 @@ void _Descale(const XTensor * a, XTensor * b, float scale);
...
@@ -64,8 +71,15 @@ void _Descale(const XTensor * a, XTensor * b, float scale);
descale tensor entires (on site)
descale tensor entires (on site)
b = a / scale
b = a / scale
*/
*/
void
_DescaleMe
(
XTensor
&
a
,
int
scale
);
void
_DescaleMe
(
XTensor
*
a
,
int
scale
);
void
_DescaleMe
(
XTensor
&
a
,
float
scale
);
void
_DescaleMe
(
XTensor
*
a
,
float
scale
);
/*
descale tensor entires (on site)
b = a / scale
*/
void
DescaleMe
(
XTensor
&
a
,
int
scale
);
void
DescaleMe
(
XTensor
&
a
,
float
scale
);
/*
/*
descale tensor entires
descale tensor entires
...
@@ -91,8 +105,15 @@ void _Shift(const XTensor * a, XTensor * b, float shift);
...
@@ -91,8 +105,15 @@ void _Shift(const XTensor * a, XTensor * b, float shift);
shift tensor entires (on site)
shift tensor entires (on site)
b = a + shift
b = a + shift
*/
*/
void
_ShiftMe
(
XTensor
&
a
,
int
shift
);
void
_ShiftMe
(
XTensor
*
a
,
int
shift
);
void
_ShiftMe
(
XTensor
&
a
,
float
shift
);
void
_ShiftMe
(
XTensor
*
a
,
float
shift
);
/*
shift tensor entires (on site)
b = a + shift
*/
void
ShiftMe
(
XTensor
&
a
,
int
shift
);
void
ShiftMe
(
XTensor
&
a
,
float
shift
);
/*
/*
shift tensor entires
shift tensor entires
...
@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base);
...
@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base);
mod tensor entires (on site)
mod tensor entires (on site)
b = a % mod
b = a % mod
*/
*/
void
_ModMe
(
XTensor
&
a
,
int
base
);
void
_ModMe
(
XTensor
*
a
,
int
base
);
/*
mod tensor entires (on site)
b = a % mod
*/
void
ModMe
(
XTensor
&
a
,
int
base
);
/*
/*
mod tensor entires
mod tensor entires
...
...
source/tensor/core/math/Clip.cpp
查看文件 @
771643c6
...
@@ -36,26 +36,26 @@ set every entry to its clip value
...
@@ -36,26 +36,26 @@ set every entry to its clip value
void
_Clip
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
lower
,
DTYPE
upper
)
void
_Clip
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
lower
,
DTYPE
upper
)
{
{
#ifdef USE_CUDA
#ifdef USE_CUDA
/* run it on GPUs */
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
_CudaClip
(
a
,
b
,
lower
,
upper
);
_CudaClip
(
a
,
b
,
lower
,
upper
);
return
;
return
;
}
}
#endif
#endif
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
if
(
d
[
i
]
>
upper
)
if
(
d
[
i
]
>
upper
)
db
[
i
]
=
upper
;
db
[
i
]
=
upper
;
else
if
(
d
[
i
]
<
lower
)
else
if
(
d
[
i
]
<
lower
)
db
[
i
]
=
lower
;
db
[
i
]
=
lower
;
else
else
db
[
i
]
=
d
[
i
];
db
[
i
]
=
d
[
i
];
}
}
}
}
/*
/*
...
@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing
...
@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing
*/
*/
void
_ClipMe
(
XTensor
*
a
,
DTYPE
lower
,
DTYPE
upper
)
void
_ClipMe
(
XTensor
*
a
,
DTYPE
lower
,
DTYPE
upper
)
{
{
_Clip
(
a
,
a
,
lower
,
upper
);
_Clip
(
a
,
a
,
lower
,
upper
);
}
/*
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void
ClipMe
(
XTensor
&
a
,
DTYPE
lower
,
DTYPE
upper
)
{
_Clip
(
&
a
,
&
a
,
lower
,
upper
);
}
}
/*
/*
...
@@ -80,18 +92,18 @@ make a new tensor to keep the result and return it
...
@@ -80,18 +92,18 @@ make a new tensor to keep the result and return it
*/
*/
XTensor
Clip
(
const
XTensor
&
a
,
DTYPE
lower
,
DTYPE
upper
)
XTensor
Clip
(
const
XTensor
&
a
,
DTYPE
lower
,
DTYPE
upper
)
{
{
XTensor
b
(
&
a
);
XTensor
b
(
&
a
);
b
.
SetTMPFlag
();
b
.
SetTMPFlag
();
/* call _Clip function */
/* call _Clip function */
_Clip
(
&
a
,
&
b
,
lower
,
upper
);
_Clip
(
&
a
,
&
b
,
lower
,
upper
);
/* tensor connections */
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_CLIP
);
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_CLIP
);
XLink
::
AddParamToHead
(
&
b
,
lower
);
XLink
::
AddParamToHead
(
&
b
,
lower
);
XLink
::
AddParamToHead
(
&
b
,
upper
);
XLink
::
AddParamToHead
(
&
b
,
upper
);
return
b
;
return
b
;
}
}
void
Clip
(
const
XTensor
&
a
,
XTensor
&
b
,
DTYPE
lower
,
DTYPE
upper
)
void
Clip
(
const
XTensor
&
a
,
XTensor
&
b
,
DTYPE
lower
,
DTYPE
upper
)
...
...
source/tensor/core/math/Clip.cu
查看文件 @
771643c6
...
@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel)
...
@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (i < size) {
if (a[i] > upper)
if (a[i] > upper)
b[i] = upper;
b[i] = upper;
else if (a[i] < lower)
else if (a[i] < lower)
b[i] = lower;
b[i] = lower;
else
else
b[i] = a[i];
b[i] = a[i];
}
}
}
}
/*
/*
...
@@ -62,7 +62,7 @@ This is for float16 computation
...
@@ -62,7 +62,7 @@ This is for float16 computation
__global__
__global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{
{
return;
return;
}
}
/*
/*
...
@@ -74,31 +74,31 @@ set each entry to its clip value
...
@@ -74,31 +74,31 @@ set each entry to its clip value
*/
*/
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int gridSize[3];
int blockSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
}
}
else if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
}
}
else {
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
}
}
BacktoCudaDev(a->devID, devIDBackup);
BacktoCudaDev(a->devID, devIDBackup);
}
}
/*
/*
...
...
source/tensor/core/math/Clip.h
查看文件 @
771643c6
...
@@ -33,6 +33,10 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
...
@@ -33,6 +33,10 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_ClipMe
(
XTensor
*
a
,
DTYPE
lower
,
DTYPE
upper
);
void
_ClipMe
(
XTensor
*
a
,
DTYPE
lower
,
DTYPE
upper
);
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void
ClipMe
(
XTensor
&
a
,
DTYPE
lower
,
DTYPE
upper
);
/* set every entry to its clip value (return an XTensor structure)
/* set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Clip
(
const
XTensor
&
a
,
DTYPE
lower
,
DTYPE
upper
);
XTensor
Clip
(
const
XTensor
&
a
,
DTYPE
lower
,
DTYPE
upper
);
...
...
source/tensor/core/math/Compare.h
查看文件 @
771643c6
...
@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value);
...
@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */
/* check whether every entry is equal to the given value (do it on site) */
void
_EqualMe
(
XTensor
*
a
,
DTYPE
value
);
void
_EqualMe
(
XTensor
*
a
,
DTYPE
value
);
/* check whether every entry is equal to the given value (do it on site) */
void
EqualMe
(
XTensor
&
a
,
DTYPE
value
);
/* check whether every entry is equal to the given value (return an XTensor structure) */
/* check whether every entry is equal to the given value (return an XTensor structure) */
XTensor
Equal
(
const
XTensor
&
a
,
DTYPE
value
);
XTensor
Equal
(
const
XTensor
&
a
,
DTYPE
value
);
...
@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value);
...
@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */
/* check whether every entry is not equal to the given value (do it on site) */
void
_NotEqualMe
(
XTensor
*
a
,
DTYPE
value
);
void
_NotEqualMe
(
XTensor
*
a
,
DTYPE
value
);
/* check whether every entry is not equal to the given value (do it on site) */
void
NotEqualMe
(
XTensor
&
a
,
DTYPE
value
);
/* check whether every entry is not equal to the given value (return an XTensor structure) */
/* check whether every entry is not equal to the given value (return an XTensor structure) */
XTensor
NotEqual
(
const
XTensor
&
a
,
DTYPE
value
);
XTensor
NotEqual
(
const
XTensor
&
a
,
DTYPE
value
);
...
...
source/tensor/core/math/Normalize.cpp
查看文件 @
771643c6
...
@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
...
@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
*/
*/
void
_Normalize
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
,
const
XTensor
*
var
,
const
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
epsilon
)
void
_Normalize
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
,
const
XTensor
*
var
,
const
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
epsilon
)
{
{
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
((
XTensor
::
IsSameShaped
(
input
,
output
)),
"Unmatched input tensors!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
input
,
output
)),
"Unmatched input tensors!"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Unmatched input tensors"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
a
,
b
)),
"Unmatched input tensors"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
mean
,
var
)),
"Unmatched input tensors"
);
CheckNTErrors
((
XTensor
::
IsSameShaped
(
mean
,
var
)),
"Unmatched input tensors"
);
...
@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor
...
@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor
{
{
_Normalize
(
input
,
input
,
dim
,
mean
,
var
,
a
,
b
,
epsilon
);
_Normalize
(
input
,
input
,
dim
,
mean
,
var
,
a
,
b
,
epsilon
);
}
}
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tensor and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
*/
void
NormalizeMe
(
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
,
const
XTensor
&
var
,
const
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
epsilon
)
{
_Normalize
(
&
input
,
&
input
,
dim
,
&
mean
,
&
var
,
&
a
,
&
b
,
epsilon
);
}
/*
/*
normalized the data with normal distribution (return an XTensor structure)
normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/math/Normalize.cu
查看文件 @
771643c6
...
@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
...
@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{
{
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
int stride = 1;
int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI];
int strideNum = input->dimSizeRDI[dimRDI];
int blockNum = 1;
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
for (int i = 0; i < input->order; i++) {
...
...
source/tensor/core/math/Normalize.h
查看文件 @
771643c6
...
@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
...
@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
void
_NormalizeMe
(
XTensor
*
input
,
int
dim
,
const
XTensor
*
mean
,
const
XTensor
*
var
,
const
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
epsilon
);
void
_NormalizeMe
(
XTensor
*
input
,
int
dim
,
const
XTensor
*
mean
,
const
XTensor
*
var
,
const
XTensor
*
a
,
const
XTensor
*
b
,
DTYPE
epsilon
);
/*
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
void
NormalizeMe
(
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
,
const
XTensor
&
var
,
const
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
epsilon
);
/*
normalized the data with normal distribution (return an XTensor structure)
normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
...
...
source/tensor/core/math/Power.cpp
查看文件 @
771643c6
...
@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p)
...
@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p)
}
}
/*
/*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void
PowerMe
(
XTensor
&
a
,
DTYPE
p
)
{
_Power
(
&
a
,
&
a
,
p
);
}
/*
get the power(a, p) (return an XTensor structure)
get the power(a, p) (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
>> a - input tensor
>> a - input tensor
...
...
source/tensor/core/math/Power.h
查看文件 @
771643c6
...
@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
...
@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
void
_PowerMe
(
XTensor
*
a
,
DTYPE
p
);
void
_PowerMe
(
XTensor
*
a
,
DTYPE
p
);
/*
/*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void
PowerMe
(
XTensor
&
a
,
DTYPE
p
);
/*
get the power(x, y) (return an XTensor structure)
get the power(x, y) (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
*/
*/
...
...
source/tensor/core/math/ScaleAndShift.cpp
查看文件 @
771643c6
...
@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
...
@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
}
}
/*
/*
scale and shift all tensor entires (do it on site)
keep the result in the input tensor a and return nothing
a = a * scale + shift
>> a - the input/output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
void
ScaleAndShiftMe
(
XTensor
&
a
,
DTYPE
scale
,
DTYPE
shift
)
{
_ScaleAndShift
(
&
a
,
&
a
,
scale
,
shift
);
}
/*
scale and shift all tensor entires (return an XTensor structure)
scale and shift all tensor entires (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/math/ScaleAndShift.h
查看文件 @
771643c6
...
@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
...
@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/*
/*
scale and shift all tensor entires
scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void
ScaleAndShiftMe
(
XTensor
&
a
,
DTYPE
scale
,
DTYPE
shift
=
0
);
/*
scale and shift all tensor entires
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
b = a * scale + shift
b = a * scale + shift
*/
*/
...
...
source/tensor/core/math/Unary.cpp
查看文件 @
771643c6
...
@@ -34,7 +34,7 @@ DTYPE square(DTYPE x)
...
@@ -34,7 +34,7 @@ DTYPE square(DTYPE x)
DTYPE
round
(
DTYPE
r
)
DTYPE
round
(
DTYPE
r
)
{
{
return
(
r
>
0.0
)
?
(
DTYPE
)
floor
(
r
+
0.5
)
:
(
DTYPE
)
ceil
(
r
-
0.5
);
return
(
r
>
0.0
)
?
(
DTYPE
)
floor
(
r
+
0.5
)
:
(
DTYPE
)
ceil
(
r
-
0.5
);
}
}
DTYPE
isnonzero
(
DTYPE
r
)
DTYPE
isnonzero
(
DTYPE
r
)
...
...
source/tensor/core/math/Unary.cu
查看文件 @
771643c6
...
@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x)
...
@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x)
__device__
__device__
DTYPE cudaround(DTYPE r)
DTYPE cudaround(DTYPE r)
{
{
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
}
__device__
__device__
...
...
source/tensor/core/math/Unary.h
查看文件 @
771643c6
...
@@ -31,6 +31,9 @@ void _Absolute(const XTensor * a, XTensor * b);
...
@@ -31,6 +31,9 @@ void _Absolute(const XTensor * a, XTensor * b);
/* set every entry to its absolute value (do it on site)
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_AbsoluteMe
(
XTensor
*
a
);
void
_AbsoluteMe
(
XTensor
*
a
);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void
AbsoluteMe
(
XTensor
&
a
);
/* set every entry to its absolute value (return an XTensor structure)
/* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Absolute
(
const
XTensor
&
a
);
XTensor
Absolute
(
const
XTensor
&
a
);
...
@@ -42,6 +45,9 @@ void _Ceil(const XTensor * a, XTensor * b);
...
@@ -42,6 +45,9 @@ void _Ceil(const XTensor * a, XTensor * b);
/* set every entry to its ceil value (do it on site)
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_CeilMe
(
XTensor
*
a
);
void
_CeilMe
(
XTensor
*
a
);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void
CeilMe
(
XTensor
&
a
);
/* set every entry to its ceil value (return an XTensor structure)
/* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Ceil
(
const
XTensor
&
a
);
XTensor
Ceil
(
const
XTensor
&
a
);
...
@@ -53,6 +59,9 @@ void _Exp(const XTensor * a, XTensor * b);
...
@@ -53,6 +59,9 @@ void _Exp(const XTensor * a, XTensor * b);
/* set every entry to its exponent value (do it on site)
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_ExpMe
(
XTensor
*
a
);
void
_ExpMe
(
XTensor
*
a
);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void
ExpMe
(
XTensor
&
a
);
/* set every entry to its exponent value (return an XTensor structure)
/* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Exp
(
const
XTensor
&
a
);
XTensor
Exp
(
const
XTensor
&
a
);
...
@@ -64,6 +73,9 @@ void _Floor(const XTensor * a, XTensor * b);
...
@@ -64,6 +73,9 @@ void _Floor(const XTensor * a, XTensor * b);
/* set every entry to its floor value (do it on site)
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_FloorMe
(
XTensor
*
a
);
void
_FloorMe
(
XTensor
*
a
);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void
FloorMe
(
XTensor
&
a
);
/* set every entry to its floor value (return an XTensor structure)
/* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Floor
(
const
XTensor
&
a
);
XTensor
Floor
(
const
XTensor
&
a
);
...
@@ -75,6 +87,9 @@ void _IsNonZero(const XTensor *a, XTensor *b);
...
@@ -75,6 +87,9 @@ void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_IsNonZeroMe
(
XTensor
*
a
);
void
_IsNonZeroMe
(
XTensor
*
a
);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void
IsNonZeroMe
(
XTensor
&
a
);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
IsNonZero
(
const
XTensor
&
a
);
XTensor
IsNonZero
(
const
XTensor
&
a
);
...
@@ -86,6 +101,9 @@ void _IsZero(const XTensor *a, XTensor *b);
...
@@ -86,6 +101,9 @@ void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_IsZeroMe
(
XTensor
*
a
);
void
_IsZeroMe
(
XTensor
*
a
);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void
IsZeroMe
(
XTensor
&
a
);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
IsZero
(
const
XTensor
&
a
);
XTensor
IsZero
(
const
XTensor
&
a
);
...
@@ -97,6 +115,9 @@ void _Log(const XTensor * a, XTensor * b);
...
@@ -97,6 +115,9 @@ void _Log(const XTensor * a, XTensor * b);
/* set every entry to its logarithm value (do it on site)
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_LogMe
(
XTensor
*
a
);
void
_LogMe
(
XTensor
*
a
);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void
LogMe
(
XTensor
&
a
);
/* set every entry to its logarithm value (return an XTensor structure)
/* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Log
(
const
XTensor
&
a
);
XTensor
Log
(
const
XTensor
&
a
);
...
@@ -108,6 +129,9 @@ void _Round(const XTensor * a, XTensor * b);
...
@@ -108,6 +129,9 @@ void _Round(const XTensor * a, XTensor * b);
/* set every entry to its round value (do it on site)
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_RoundMe
(
XTensor
*
a
);
void
_RoundMe
(
XTensor
*
a
);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void
RoundMe
(
XTensor
&
a
);
/* set every entry to its round value (return an XTensor structure)
/* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Round
(
const
XTensor
&
a
);
XTensor
Round
(
const
XTensor
&
a
);
...
@@ -119,6 +143,9 @@ void _Sqrt(const XTensor * a, XTensor * b);
...
@@ -119,6 +143,9 @@ void _Sqrt(const XTensor * a, XTensor * b);
/* set every entry to its sqrt value (do it on site)
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_SqrtMe
(
XTensor
*
a
);
void
_SqrtMe
(
XTensor
*
a
);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void
SqrtMe
(
XTensor
&
a
);
/* set every entry to its sqrt value (return an XTensor structure)
/* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Sqrt
(
const
XTensor
&
a
);
XTensor
Sqrt
(
const
XTensor
&
a
);
...
@@ -130,6 +157,9 @@ void _Square(const XTensor * a, XTensor * b);
...
@@ -130,6 +157,9 @@ void _Square(const XTensor * a, XTensor * b);
/* set every entry to its square value (do it on site)
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_SquareMe
(
XTensor
*
a
);
void
_SquareMe
(
XTensor
*
a
);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void
SquareMe
(
XTensor
&
a
);
/* set every entry to its square value (return an XTensor structure)
/* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Square
(
const
XTensor
&
a
);
XTensor
Square
(
const
XTensor
&
a
);
...
@@ -142,6 +172,9 @@ void _Sin(const XTensor * a, XTensor * b);
...
@@ -142,6 +172,9 @@ void _Sin(const XTensor * a, XTensor * b);
/* set every entry to its sine value (do it on site)
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_SinMe
(
XTensor
*
a
);
void
_SinMe
(
XTensor
*
a
);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void
SinMe
(
XTensor
&
a
);
/* set every entry to its sine value (return an XTensor structure)
/* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Sin
(
const
XTensor
&
a
);
XTensor
Sin
(
const
XTensor
&
a
);
...
@@ -153,6 +186,9 @@ void _Cos(const XTensor * a, XTensor * b);
...
@@ -153,6 +186,9 @@ void _Cos(const XTensor * a, XTensor * b);
/* set every entry to its cosine value (do it on site)
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_CosMe
(
XTensor
*
a
);
void
_CosMe
(
XTensor
*
a
);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void
CosMe
(
XTensor
&
a
);
/* set every entry to its cosine value (return an XTensor structure)
/* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Cos
(
const
XTensor
&
a
);
XTensor
Cos
(
const
XTensor
&
a
);
...
@@ -164,6 +200,9 @@ void _Tan(const XTensor * a, XTensor * b);
...
@@ -164,6 +200,9 @@ void _Tan(const XTensor * a, XTensor * b);
/* set every entry to its tangent value (do it on site)
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_TanMe
(
XTensor
*
a
);
void
_TanMe
(
XTensor
*
a
);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void
TanMe
(
XTensor
&
a
);
/* set every entry to its tangent value (return an XTensor structure)
/* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Tan
(
const
XTensor
&
a
);
XTensor
Tan
(
const
XTensor
&
a
);
...
...
source/tensor/core/reduce/ReduceMax.cpp
查看文件 @
771643c6
...
@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
...
@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors
((
input
->
order
==
output
->
order
+
1
),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
input
->
order
==
output
->
order
+
1
),
"Incorrect tensor sizes!"
);
CheckNTErrors
((
input
->
order
>
dim
&&
dim
>=
0
),
"Illegal dimension to reduce!"
);
CheckNTErrors
((
input
->
order
>
dim
&&
dim
>=
0
),
"Illegal dimension to reduce!"
);
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
...
@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it
...
@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it
XTensor
ReduceMax
(
const
XTensor
&
input
,
int
dim
)
XTensor
ReduceMax
(
const
XTensor
&
input
,
int
dim
)
{
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
source/tensor/core/reduce/ReduceMax.cu
查看文件 @
771643c6
...
@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
...
@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
...
...
source/tensor/core/reduce/ReduceMean.cpp
查看文件 @
771643c6
...
@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
...
@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{
{
CheckNTErrors
((
input
->
order
>
dim
),
"Illegal dimension specified!"
);
CheckNTErrors
((
input
->
order
>
dim
),
"Illegal dimension specified!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
_ReduceSum
(
input
,
output
,
dim
);
_ReduceSum
(
input
,
output
,
dim
);
...
@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
...
@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
XTensor
ReduceMean
(
const
XTensor
&
input
,
int
dim
)
XTensor
ReduceMean
(
const
XTensor
&
input
,
int
dim
)
{
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
771643c6
...
@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
...
@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
CheckNTErrors
((
input
->
dataType
==
output
->
dataType
),
"Unmatched data types!"
);
CheckNTErrors
((
shift
==
NULL
||
XTensor
::
IsSameShaped
(
output
,
shift
)),
"Incorrect shift tensor size!"
);
CheckNTErrors
((
shift
==
NULL
||
XTensor
::
IsSameShaped
(
output
,
shift
)),
"Incorrect shift tensor size!"
);
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
CheckNTErrors
(
dimRDI
>=
0
,
"Wrong dimension!"
);
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
...
@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
...
@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
,
bool
isExp
)
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
,
bool
isExp
)
{
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true
...
@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
,
bool
isExp
)
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
,
bool
isExp
)
{
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
source/tensor/core/reduce/ReduceSum.cu
查看文件 @
771643c6
...
@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
...
@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
if (tid < blockDim.x / 32)
if (tid < blockDim.x / 32)
value = data[tid];
value = data[tid];
else
else
value = 0;
value = 0;
value = shflDownReduceSum(value);
value = shflDownReduceSum(value);
if (tid == 0 && blockIdx.x < reducedStrideNum) {
if (tid == 0 && blockIdx.x < reducedStrideNum) {
...
@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
...
@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
...
...
source/tensor/core/reduce/ReduceSumSquared.cpp
查看文件 @
771643c6
...
@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
...
@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
XTensor
ReduceSumSquared
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
)
XTensor
ReduceSumSquared
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
)
{
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
source/tensor/core/reduce/ReduceVariance.cpp
查看文件 @
771643c6
...
@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
...
@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
*/
void
_ReduceVariance
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
)
void
_ReduceVariance
(
const
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
const
XTensor
*
mean
)
{
{
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
dimRDI
=
input
->
order
-
dim
-
1
;
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
int
num
=
input
->
dimSizeRDI
[
dimRDI
];
_ReduceSum
(
input
,
output
,
dim
,
mean
,
2.0
F
);
_ReduceSum
(
input
,
output
,
dim
,
mean
,
2.0
F
);
_ScaleAndShiftMe
(
output
,
(
DTYPE
)
1
/
num
,
0
);
_ScaleAndShiftMe
(
output
,
(
DTYPE
)
1
/
num
,
0
);
...
@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
...
@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
XTensor
ReduceVariance
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
)
XTensor
ReduceVariance
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
)
{
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
...
...
source/tensor/core/shape/ConcatenateSolely.cpp
查看文件 @
771643c6
...
@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
...
@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
}
}
}
}
else
{
else
{
StrList
*
sourceArrays
=
new
StrList
(
smalls
->
count
);
StrList
*
sourceArrays
=
new
StrList
(
smalls
->
count
);
int
*
blockSizes
=
new
int
[
smalls
->
count
];
int
*
blockSizes
=
new
int
[
smalls
->
count
];
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
...
...
source/tensor/core/shape/Permute.h
查看文件 @
771643c6
...
@@ -41,6 +41,13 @@ a = permuted(a)
...
@@ -41,6 +41,13 @@ a = permuted(a)
*/
*/
void
_PermuteMe
(
XTensor
*
a
,
int
*
dimPermute
);
void
_PermuteMe
(
XTensor
*
a
,
int
*
dimPermute
);
/*
permute the tensor dimensions (do it on site).
keep the result in the input tensor and return nothing.
a = permuted(a)
*/
void
PermuteMe
(
XTensor
&
a
,
int
*
dimPermute
);
/*
/*
make a tensor with permuted dimensions (return an XTensor structure).
make a tensor with permuted dimensions (return an XTensor structure).
make a new tensor to keep the result and return it.
make a new tensor to keep the result and return it.
...
...
source/tensor/core/shape/Reshape.cpp
查看文件 @
771643c6
...
@@ -43,9 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
...
@@ -43,9 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t
.
Reshape
(
order
,
dimSize
);
t
.
Reshape
(
order
,
dimSize
);
/* tensor connections */
/* tensor connections */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_RESHAPE
);
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
SHAPE_RESHAPE
);
return
t
;
return
t
;
}
}
void
Reshape
(
XTensor
&
s
,
XTensor
&
t
,
int
order
,
int
*
dimSize
)
void
Reshape
(
XTensor
&
s
,
XTensor
&
t
,
int
order
,
int
*
dimSize
)
...
...
source/tensor/core/shape/Squeeze.cpp
查看文件 @
771643c6
...
@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim)
...
@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim)
}
}
/*
/*
squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void
SqueezeMe
(
XTensor
&
source
,
int
leadingDim
)
{
_Squeeze
(
&
source
,
&
source
,
leadingDim
);
}
/*
squeeze the tensor along the specified dimension (return an XTensor structure)
squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/shape/Squeeze.h
查看文件 @
771643c6
...
@@ -33,6 +33,10 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
...
@@ -33,6 +33,10 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing */
void
_SqueezeMe
(
XTensor
*
source
,
int
leadingDim
=
-
1
);
void
_SqueezeMe
(
XTensor
*
source
,
int
leadingDim
=
-
1
);
/* squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing */
void
SqueezeMe
(
XTensor
&
source
,
int
leadingDim
=
-
1
);
/* squeeze the tensor along the specified dimension (return an XTensor structure)
/* squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor
Squeeze
(
XTensor
&
source
,
int
leadingDim
=
-
1
);
XTensor
Squeeze
(
XTensor
&
source
,
int
leadingDim
=
-
1
);
...
...
source/tensor/core/sort/Sort.cpp
查看文件 @
771643c6
...
@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
...
@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
CheckNTErrors
((
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
int
dimRDI
=
a
->
order
-
dim
-
1
;
int
dimRDI
=
a
->
order
-
dim
-
1
;
/* make the index tensor */
/* make the index tensor */
index
->
SetAscendingOrder
(
dim
);
index
->
SetAscendingOrder
(
dim
);
...
@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
...
@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
blockNum
*=
a
->
dimSizeRDI
[
i
];
blockNum
*=
a
->
dimSizeRDI
[
i
];
int
blockSize
=
stride
*
strideNum
;
int
blockSize
=
stride
*
strideNum
;
_CopyValues
(
a
,
b
);
_CopyValues
(
a
,
b
);
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
)
{
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
)
{
for
(
int
i
=
0
;
i
<
stride
;
i
++
)
{
for
(
int
i
=
0
;
i
<
stride
;
i
++
)
{
void
*
dataB
=
(
char
*
)
b
->
data
+
(
k
*
blockSize
+
i
)
*
b
->
unitSize
;
void
*
dataB
=
(
char
*
)
b
->
data
+
(
k
*
blockSize
+
i
)
*
b
->
unitSize
;
...
@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim)
...
@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim)
}
}
/*
/*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void
SortMe
(
XTensor
&
a
,
XTensor
&
index
,
int
dim
)
{
_Sort
(
&
a
,
&
a
,
&
index
,
dim
);
}
/*
sort the tensor along a given dimension (return an XTensor structure)
sort the tensor along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/sort/Sort.cu
查看文件 @
771643c6
...
@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
...
@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1;
int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI])
if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI];
k = b->dimSizeRDI[dimRDI];
...
...
source/tensor/core/sort/Sort.h
查看文件 @
771643c6
...
@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing
...
@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing
*/
*/
void
_SortMe
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
);
void
_SortMe
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void
SortMe
(
XTensor
&
a
,
XTensor
&
index
,
int
dim
);
/*
/*
sort the data along a given dimension (return an XTensor structure)
sort the data along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/sort/TopK.cu
查看文件 @
771643c6
...
@@ -238,9 +238,9 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
...
@@ -238,9 +238,9 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/*
/*
merge the result over the workers.
merge the result over the workers.
This can be improved by parallel merging
This can be improved by parallel merging
*/
*/
if (blockDim.x > 1) {
if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
...
@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum,
...
@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum,
/*
/*
if (idx == 0)
if (idx == 0)
{
{
unsigned int* uintOutput = new unsigned int;
unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int;
int* tmpIndex = new int;
//*******************something worng***************************
//*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//*************************************************************
//*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride;
int blockIndex = idy / stride;
int offsetInBlock = idy% stride;
int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{
{
//for(int i = )
//for(int i = )
output[i] = deconvert(uintOutput[j]);
output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j];
index[i] = tmpIndex[j];
}
}
}
}
__syncthreads();
__syncthreads();
*/
*/
...
...
source/tensor/core/utilities/SetAscendingOrder.cu
查看文件 @
771643c6
...
@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
...
@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{
{
CheckNTErrors((a->dataType == X_INT), "TODO!");
CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1;
int dimRDI = a->order - dim - 1;
int stride = 1;
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++)
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
stride *= a->dimSizeRDI[i];
...
...
source/tensor/core/utilities/XMatrixSegment.cpp
查看文件 @
771643c6
...
@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
...
@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
va_list
ap
;
va_list
ap
;
va_start
(
ap
,
argNum
);
va_start
(
ap
,
argNum
);
for
(
int
i
=
0
;
i
<
argNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
argNum
;
i
++
)
{
XTensor
*
p
=
va_arg
(
ap
,
XTensor
*
);
XTensor
*
p
=
va_arg
(
ap
,
XTensor
*
);
jobArgList
->
Add
(
p
);
jobArgList
->
Add
(
p
);
}
}
va_end
(
ap
);
va_end
(
ap
);
...
@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
...
@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
2. other arguments
2. other arguments
*/
*/
for
(
int
i
=
0
;
i
<
jobNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
jobNum
;
i
++
)
{
IntList
*
indexArgs
=
new
IntList
(
4
);
IntList
*
indexArgs
=
new
IntList
(
4
);
TensorList
*
blockArgs
=
new
TensorList
(
argNum
);
TensorList
*
blockArgs
=
new
TensorList
(
argNum
);
int
*
blockIndex
=
indexList
+
i
*
4
;
int
*
blockIndex
=
indexList
+
i
*
4
;
indexArgs
->
Add
(
blockIndex
[
0
]);
indexArgs
->
Add
(
blockIndex
[
0
]);
indexArgs
->
Add
(
blockIndex
[
1
]);
indexArgs
->
Add
(
blockIndex
[
1
]);
indexArgs
->
Add
(
blockIndex
[
2
]);
indexArgs
->
Add
(
blockIndex
[
2
]);
indexArgs
->
Add
(
blockIndex
[
3
]);
indexArgs
->
Add
(
blockIndex
[
3
]);
for
(
int
j
=
0
;
j
<
argNum
;
j
++
)
for
(
int
j
=
0
;
j
<
argNum
;
j
++
)
blockArgs
->
Add
(
jobArgList
->
GetItem
(
j
));
blockArgs
->
Add
(
jobArgList
->
GetItem
(
j
));
args
->
Add
((
XTensor
*
)
indexArgs
);
args
->
Add
((
XTensor
*
)
indexArgs
);
args
->
Add
((
XTensor
*
)
blockArgs
);
args
->
Add
((
XTensor
*
)
blockArgs
);
jobs
->
Add
((
XTensor
*
)
job
);
jobs
->
Add
((
XTensor
*
)
job
);
...
...
source/tensor/test/TAbsolute.cpp
查看文件 @
771643c6
...
@@ -30,84 +30,84 @@ Set every entry to its absolute value.
...
@@ -30,84 +30,84 @@ Set every entry to its absolute value.
*/
*/
bool
TestAbsolute1
()
bool
TestAbsolute1
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
order
=
2
;
int
order
=
2
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
3
;
dimSize
[
0
]
=
3
;
dimSize
[
1
]
=
2
;
dimSize
[
1
]
=
2
;
int
unitNum
=
1
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
0.5
F
,
-
4.0
F
},
{
0.5
F
,
-
4.0
F
},
{
0.0
F
,
6.0
F
}
};
{
0.0
F
,
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
0.5
F
,
4.0
F
},
{
0.5
F
,
4.0
F
},
{
0.0
F
,
6.0
F
}
};
{
0.0
F
,
6.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
aMe
=
NewTensor
(
order
,
dimSize
);
XTensor
*
aMe
=
NewTensor
(
order
,
dimSize
);
XTensor
bUser
;
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
unitNum
);
a
->
SetData
(
aData
,
unitNum
);
aMe
->
SetData
(
aData
,
unitNum
);
aMe
->
SetData
(
aData
,
unitNum
);
/* call Absolute function */
/* call Absolute function */
_Absolute
(
a
,
b
);
_Absolute
(
a
,
b
);
_AbsoluteMe
(
aMe
);
_AbsoluteMe
(
aMe
);
bUser
=
Absolute
(
*
a
);
bUser
=
Absolute
(
*
a
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
unitNum
);
aGPU
->
SetData
(
aData
,
unitNum
);
aMeGPU
->
SetData
(
aData
,
unitNum
);
aMeGPU
->
SetData
(
aData
,
unitNum
);
/* call Absolute function */
/* call Absolute function */
_Absolute
(
aGPU
,
bGPU
);
_Absolute
(
aGPU
,
bGPU
);
_AbsoluteMe
(
aMeGPU
);
_AbsoluteMe
(
aMeGPU
);
bUserGPU
=
Absolute
(
*
aGPU
);
bUserGPU
=
Absolute
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
aMeGPU
;
delete
[]
dimSize
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
[]
dimSize
;
delete
[]
dimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -119,33 +119,33 @@ TODO!!
...
@@ -119,33 +119,33 @@ TODO!!
/* test for Absolute Function */
/* test for Absolute Function */
bool
TestAbsolute
()
bool
TestAbsolute
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Absolute] set every entry to its absolute value
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Absolute] set every entry to its absolute value
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestAbsolute1
();
caseFlag
=
TestAbsolute1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TClip.cpp
查看文件 @
771643c6
...
@@ -31,88 +31,88 @@ Set every entry to its clip value.
...
@@ -31,88 +31,88 @@ Set every entry to its clip value.
*/
*/
bool
TestClip1
()
bool
TestClip1
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
0.0
F
,
4.0
F
},
{
0.0
F
,
4.0
F
},
{
5.0
F
,
-
6.0
F
}
};
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
{
0.0
F
,
1.0
F
},
{
0.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
}
};
{
1.0
F
,
-
1.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Clip function */
/* call Clip function */
_Clip
(
a
,
b
,
-
1.0
,
1.0
);
_Clip
(
a
,
b
,
-
1.0
,
1.0
);
_ClipMe
(
aMe
,
-
1.0
,
1.0
);
_ClipMe
(
aMe
,
-
1.0
,
1.0
);
bUser
=
Clip
(
*
a
,
-
1.0
,
1.0
);
bUser
=
Clip
(
*
a
,
-
1.0
,
1.0
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Clip function */
/* call Clip function */
_Clip
(
aGPU
,
bGPU
,
-
1.0
,
1.0
);
_Clip
(
aGPU
,
bGPU
,
-
1.0
,
1.0
);
_ClipMe
(
aMeGPU
,
-
1.0
,
1.0
);
_ClipMe
(
aMeGPU
,
-
1.0
,
1.0
);
bUserGPU
=
Clip
(
*
aGPU
,
-
1.0
,
1.0
);
bUserGPU
=
Clip
(
*
aGPU
,
-
1.0
,
1.0
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -124,33 +124,33 @@ TODO!!
...
@@ -124,33 +124,33 @@ TODO!!
/* test for Clip Function */
/* test for Clip Function */
bool
TestClip
()
bool
TestClip
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Clip] set every entry to its clip value
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Clip] set every entry to its clip value
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestClip1
();
caseFlag
=
TestClip1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TCompare.cpp
查看文件 @
771643c6
...
@@ -31,88 +31,88 @@ Comapre whether every entry is equal to the specified value.
...
@@ -31,88 +31,88 @@ Comapre whether every entry is equal to the specified value.
*/
*/
bool
TestCompare1
()
bool
TestCompare1
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
0.0
F
,
4.0
F
},
{
0.0
F
,
4.0
F
},
{
5.0
F
,
1.0
F
}
};
{
5.0
F
,
1.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
0.0
F
},
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
},
{
0.0
F
,
1.0
F
}
};
{
0.0
F
,
1.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Equal function */
/* call Equal function */
_Equal
(
a
,
b
,
1.0
);
_Equal
(
a
,
b
,
1.0
);
_EqualMe
(
aMe
,
1.0
);
_EqualMe
(
aMe
,
1.0
);
bUser
=
Equal
(
*
a
,
1.0
);
bUser
=
Equal
(
*
a
,
1.0
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Equal function */
/* call Equal function */
_Equal
(
aGPU
,
bGPU
,
1.0
);
_Equal
(
aGPU
,
bGPU
,
1.0
);
_EqualMe
(
aMeGPU
,
1.0
);
_EqualMe
(
aMeGPU
,
1.0
);
bUserGPU
=
Equal
(
*
aGPU
,
1.0
);
bUserGPU
=
Equal
(
*
aGPU
,
1.0
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -124,33 +124,33 @@ TODO!!
...
@@ -124,33 +124,33 @@ TODO!!
/* test for Compare Function */
/* test for Compare Function */
bool
TestCompare
()
bool
TestCompare
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Compare] compare every entry with specified value
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Compare] compare every entry with specified value
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestCompare1
();
caseFlag
=
TestCompare1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TConcatenate.cpp
查看文件 @
771643c6
...
@@ -29,7 +29,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
...
@@ -29,7 +29,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
*/
bool
TestConcatenate1
()
bool
TestConcatenate1
()
{
{
/* create list */
/* create list */
TensorList
*
sList
=
new
TensorList
();
TensorList
*
sList
=
new
TensorList
();
/* a source tensor of size (2, 1) */
/* a source tensor of size (2, 1) */
...
@@ -83,7 +83,7 @@ bool TestConcatenate1()
...
@@ -83,7 +83,7 @@ bool TestConcatenate1()
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* add tensors to list */
/* add tensors to list */
sList
->
Add
(
s1
);
sList
->
Add
(
s1
);
sList
->
Add
(
s2
);
sList
->
Add
(
s2
);
...
@@ -99,29 +99,29 @@ bool TestConcatenate1()
...
@@ -99,29 +99,29 @@ bool TestConcatenate1()
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* clear list */
/* clear list */
sList
->
Clear
();
sList
->
Clear
();
/* add tensors to list*/
/* add tensors to list*/
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU2
);
sList
->
Add
(
sGPU2
);
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
1
);
_Concatenate
(
sList
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -135,7 +135,7 @@ bool TestConcatenate1()
...
@@ -135,7 +135,7 @@ bool TestConcatenate1()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -156,7 +156,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
...
@@ -156,7 +156,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
*/
bool
TestConcatenate2
()
bool
TestConcatenate2
()
{
{
/* create list */
/* create list */
TensorList
*
sList
=
new
TensorList
();
TensorList
*
sList
=
new
TensorList
();
/* a source tensor of size (2, 1) */
/* a source tensor of size (2, 1) */
...
@@ -212,7 +212,7 @@ bool TestConcatenate2()
...
@@ -212,7 +212,7 @@ bool TestConcatenate2()
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* add tensors to list */
/* add tensors to list */
sList
->
Add
(
s1
);
sList
->
Add
(
s1
);
sList
->
Add
(
s2
);
sList
->
Add
(
s2
);
...
@@ -224,35 +224,35 @@ bool TestConcatenate2()
...
@@ -224,35 +224,35 @@ bool TestConcatenate2()
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* clear list */
/* clear list */
sList
->
Clear
();
sList
->
Clear
();
/* add tensors to list*/
/* add tensors to list*/
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU2
);
sList
->
Add
(
sGPU2
);
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
0
);
_Concatenate
(
sList
,
tGPU
,
0
);
tUserGPU
=
Concatenate
(
*
sList
,
0
);
tUserGPU
=
Concatenate
(
*
sList
,
0
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
...
@@ -264,7 +264,7 @@ bool TestConcatenate2()
...
@@ -264,7 +264,7 @@ bool TestConcatenate2()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -285,7 +285,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
...
@@ -285,7 +285,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
*/
bool
TestConcatenate3
()
bool
TestConcatenate3
()
{
{
/* create list */
/* create list */
TensorList
*
sList
=
new
TensorList
();
TensorList
*
sList
=
new
TensorList
();
/* a source tensor of size (2, 1) */
/* a source tensor of size (2, 1) */
...
@@ -339,7 +339,7 @@ bool TestConcatenate3()
...
@@ -339,7 +339,7 @@ bool TestConcatenate3()
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* add tensors to list */
/* add tensors to list */
sList
->
Add
(
s1
);
sList
->
Add
(
s1
);
sList
->
Add
(
s2
);
sList
->
Add
(
s2
);
...
@@ -351,35 +351,35 @@ bool TestConcatenate3()
...
@@ -351,35 +351,35 @@ bool TestConcatenate3()
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* clear list */
/* clear list */
sList
->
Clear
();
sList
->
Clear
();
/* add tensors to list*/
/* add tensors to list*/
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU2
);
sList
->
Add
(
sGPU2
);
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
1
);
_Concatenate
(
sList
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
...
@@ -391,7 +391,7 @@ bool TestConcatenate3()
...
@@ -391,7 +391,7 @@ bool TestConcatenate3()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -402,7 +402,7 @@ bool TestConcatenate3()
...
@@ -402,7 +402,7 @@ bool TestConcatenate3()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -471,28 +471,28 @@ bool TestConcatenate4()
...
@@ -471,28 +471,28 @@ bool TestConcatenate4()
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sGPU1
,
sGPU2
,
tGPU
,
1
);
_Concatenate
(
sGPU1
,
sGPU2
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sGPU1
,
*
sGPU2
,
1
);
tUserGPU
=
Concatenate
(
*
sGPU1
,
*
sGPU2
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
delete
t
;
delete
t
;
...
@@ -503,7 +503,7 @@ bool TestConcatenate4()
...
@@ -503,7 +503,7 @@ bool TestConcatenate4()
//delete[] sDimSize2;
//delete[] sDimSize2;
//delete[] tDimSize;
//delete[] tDimSize;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
@@ -513,7 +513,7 @@ bool TestConcatenate4()
...
@@ -513,7 +513,7 @@ bool TestConcatenate4()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
...
source/tensor/test/TConcatenateSolely.cpp
查看文件 @
771643c6
...
@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
...
@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
*/
bool
TestConcatenateSolely1
()
bool
TestConcatenateSolely1
()
{
{
/* create list */
/* create list */
TensorList
*
sList
=
new
TensorList
();
TensorList
*
sList
=
new
TensorList
();
/* a source tensor of size (2, 1) */
/* a source tensor of size (2, 1) */
...
@@ -83,44 +83,44 @@ bool TestConcatenateSolely1()
...
@@ -83,44 +83,44 @@ bool TestConcatenateSolely1()
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* add tensors to list */
/* add tensors to list */
sList
->
Add
(
s1
);
sList
->
Add
(
s1
);
sList
->
Add
(
s2
);
sList
->
Add
(
s2
);
/* call ConcatenateSolely function */
/* call ConcatenateSolely function */
_ConcatenateSolely
(
sList
,
t
,
1
);
_ConcatenateSolely
(
sList
,
t
,
1
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* clear list */
/* clear list */
sList
->
Clear
();
sList
->
Clear
();
/* add tensors to list*/
/* add tensors to list*/
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU2
);
sList
->
Add
(
sGPU2
);
/* call ConcatenateSolely function */
/* call ConcatenateSolely function */
_ConcatenateSolely
(
sList
,
tGPU
,
1
);
_ConcatenateSolely
(
sList
,
tGPU
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
...
@@ -132,7 +132,7 @@ bool TestConcatenateSolely1()
...
@@ -132,7 +132,7 @@ bool TestConcatenateSolely1()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -143,7 +143,7 @@ bool TestConcatenateSolely1()
...
@@ -143,7 +143,7 @@ bool TestConcatenateSolely1()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -153,7 +153,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
...
@@ -153,7 +153,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
*/
bool
TestConcatenateSolely2
()
bool
TestConcatenateSolely2
()
{
{
/* create list */
/* create list */
TensorList
*
sList
=
new
TensorList
();
TensorList
*
sList
=
new
TensorList
();
/* a source tensor of size (2, 1) */
/* a source tensor of size (2, 1) */
...
@@ -208,7 +208,7 @@ bool TestConcatenateSolely2()
...
@@ -208,7 +208,7 @@ bool TestConcatenateSolely2()
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* add tensors to list */
/* add tensors to list */
sList
->
Add
(
s1
);
sList
->
Add
(
s1
);
sList
->
Add
(
s2
);
sList
->
Add
(
s2
);
...
@@ -219,33 +219,33 @@ bool TestConcatenateSolely2()
...
@@ -219,33 +219,33 @@ bool TestConcatenateSolely2()
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* clear list */
/* clear list */
sList
->
Clear
();
sList
->
Clear
();
/* add tensors to list*/
/* add tensors to list*/
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU2
);
sList
->
Add
(
sGPU2
);
/* call concatenatesolely function */
/* call concatenatesolely function */
_ConcatenateSolely
(
sList
,
tGPU
,
0
);
_ConcatenateSolely
(
sList
,
tGPU
,
0
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
...
@@ -257,7 +257,7 @@ bool TestConcatenateSolely2()
...
@@ -257,7 +257,7 @@ bool TestConcatenateSolely2()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -268,7 +268,7 @@ bool TestConcatenateSolely2()
...
@@ -268,7 +268,7 @@ bool TestConcatenateSolely2()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -278,7 +278,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
...
@@ -278,7 +278,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
*/
bool
TestConcatenateSolely3
()
bool
TestConcatenateSolely3
()
{
{
/* create list */
/* create list */
TensorList
*
sList
=
new
TensorList
();
TensorList
*
sList
=
new
TensorList
();
/* a source tensor of size (2, 1) */
/* a source tensor of size (2, 1) */
...
@@ -331,44 +331,44 @@ bool TestConcatenateSolely3()
...
@@ -331,44 +331,44 @@ bool TestConcatenateSolely3()
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* add tensors to list */
/* add tensors to list */
sList
->
Add
(
s1
);
sList
->
Add
(
s1
);
sList
->
Add
(
s2
);
sList
->
Add
(
s2
);
/* call ConcatenateSolely function */
/* call ConcatenateSolely function */
_ConcatenateSolely
(
sList
,
t
,
1
);
_ConcatenateSolely
(
sList
,
t
,
1
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* clear list */
/* clear list */
sList
->
Clear
();
sList
->
Clear
();
/* add tensors to list*/
/* add tensors to list*/
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU1
);
sList
->
Add
(
sGPU2
);
sList
->
Add
(
sGPU2
);
/* call ConcatenateSolely function */
/* call ConcatenateSolely function */
_ConcatenateSolely
(
sList
,
tGPU
,
1
);
_ConcatenateSolely
(
sList
,
tGPU
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
...
@@ -380,7 +380,7 @@ bool TestConcatenateSolely3()
...
@@ -380,7 +380,7 @@ bool TestConcatenateSolely3()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -391,7 +391,7 @@ bool TestConcatenateSolely3()
...
@@ -391,7 +391,7 @@ bool TestConcatenateSolely3()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
...
source/tensor/test/TConvertDataType.cpp
查看文件 @
771643c6
...
@@ -31,72 +31,72 @@ In this case, the flaot32 data type is converted to int32 data type.
...
@@ -31,72 +31,72 @@ In this case, the flaot32 data type is converted to int32 data type.
*/
*/
bool
TestConvertDataType1
()
bool
TestConvertDataType1
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
0.5
F
,
4.0
F
},
{
0.5
F
,
4.0
F
},
{
5.0
F
,
6.0
F
}
};
{
5.0
F
,
6.0
F
}
};
int
answer
[
3
][
2
]
=
{
{
1
,
2
},
int
answer
[
3
][
2
]
=
{
{
1
,
2
},
{
0
,
4
},
{
0
,
4
},
{
5
,
6
}
};
{
5
,
6
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
);
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
b
->
SetZeroAll
();
b
->
SetZeroAll
();
/* call ConvertDataType function */
/* call ConvertDataType function */
_ConvertDataType
(
a
,
b
);
_ConvertDataType
(
a
,
b
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
,
1.0
F
,
0
);
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call ConvertDataType function */
/* call ConvertDataType function */
_ConvertDataType
(
aGPU
,
bGPU
);
_ConvertDataType
(
aGPU
,
bGPU
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -106,72 +106,72 @@ In this case, the int32 data type is converted to float32 data type.
...
@@ -106,72 +106,72 @@ In this case, the int32 data type is converted to float32 data type.
*/
*/
bool
TestConvertDataType2
()
bool
TestConvertDataType2
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
int
aData
[
3
][
2
]
=
{
{
1
,
2
},
int
aData
[
3
][
2
]
=
{
{
1
,
2
},
{
0
,
4
},
{
0
,
4
},
{
5
,
6
}
};
{
5
,
6
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
0.0
F
,
4.0
F
},
{
0.0
F
,
4.0
F
},
{
5.0
F
,
6.0
F
}
};
{
5.0
F
,
6.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
b
->
SetZeroAll
();
b
->
SetZeroAll
();
/* call ConvertDataType function */
/* call ConvertDataType function */
_ConvertDataType
(
a
,
b
);
_ConvertDataType
(
a
,
b
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_INT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call ConvertDataType function */
/* call ConvertDataType function */
_ConvertDataType
(
aGPU
,
bGPU
);
_ConvertDataType
(
aGPU
,
bGPU
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -298,53 +298,53 @@ TODO!!
...
@@ -298,53 +298,53 @@ TODO!!
/* test for ConvertDataType Function */
/* test for ConvertDataType Function */
bool
TestConvertDataType
()
bool
TestConvertDataType
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ConvertDataType] convert data type
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ConvertDataType] convert data type
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestConvertDataType1
();
caseFlag
=
TestConvertDataType1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
/* case 2 test */
caseFlag
=
TestConvertDataType2
();
caseFlag
=
TestConvertDataType2
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
/* case 3 test */
caseFlag
=
TestConvertDataType3
();
caseFlag
=
TestConvertDataType3
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TCos.cpp
查看文件 @
771643c6
...
@@ -30,84 +30,84 @@ Set every entry to its cosine value.
...
@@ -30,84 +30,84 @@ Set every entry to its cosine value.
*/
*/
bool
TestCos1
()
bool
TestCos1
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
order
=
2
;
int
order
=
2
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
3
;
dimSize
[
0
]
=
3
;
dimSize
[
1
]
=
2
;
dimSize
[
1
]
=
2
;
int
unitNum
=
1
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
-
1.0
F
,
-
2.0
F
},
{
-
1.0
F
,
-
2.0
F
},
{
0.0
F
,
0.5
F
}
};
{
0.0
F
,
0.5
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
0.5403
F
,
-
0.4161
F
},
DTYPE
answer
[
3
][
2
]
=
{
{
0.5403
F
,
-
0.4161
F
},
{
0.5403
F
,
-
0.4161
F
},
{
0.5403
F
,
-
0.4161
F
},
{
1.0
F
,
0.8776
F
}
};
{
1.0
F
,
0.8776
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
aMe
=
NewTensor
(
order
,
dimSize
);
XTensor
*
aMe
=
NewTensor
(
order
,
dimSize
);
XTensor
bUser
;
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
unitNum
);
a
->
SetData
(
aData
,
unitNum
);
aMe
->
SetData
(
aData
,
unitNum
);
aMe
->
SetData
(
aData
,
unitNum
);
/* call Cos function */
/* call Cos function */
_Cos
(
a
,
b
);
_Cos
(
a
,
b
);
_CosMe
(
aMe
);
_CosMe
(
aMe
);
bUser
=
Cos
(
*
a
);
bUser
=
Cos
(
*
a
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
unitNum
);
aGPU
->
SetData
(
aData
,
unitNum
);
aMeGPU
->
SetData
(
aData
,
unitNum
);
aMeGPU
->
SetData
(
aData
,
unitNum
);
/* call Cos function */
/* call Cos function */
_Cos
(
aGPU
,
bGPU
);
_Cos
(
aGPU
,
bGPU
);
_CosMe
(
aMeGPU
);
_CosMe
(
aMeGPU
);
bUserGPU
=
Cos
(
*
aGPU
);
bUserGPU
=
Cos
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
aMeGPU
;
delete
[]
dimSize
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
[]
dimSize
;
delete
[]
dimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -119,33 +119,33 @@ TODO!!
...
@@ -119,33 +119,33 @@ TODO!!
/* test for Cos Function */
/* test for Cos Function */
bool
TestCos
()
bool
TestCos
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Cos] set every entry to its cosine value
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Cos] set every entry to its cosine value
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestCos1
();
caseFlag
=
TestCos1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TDiv.cpp
查看文件 @
771643c6
...
@@ -30,97 +30,97 @@ In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
...
@@ -30,97 +30,97 @@ In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
*/
bool
TestDiv1
()
bool
TestDiv1
()
{
{
/* a source tensor of size (2, 2) */
/* a source tensor of size (2, 2) */
int
sOrder1
=
2
;
int
sOrder1
=
2
;
int
*
sDimSize1
=
new
int
[
sOrder1
];
int
*
sDimSize1
=
new
int
[
sOrder1
];
sDimSize1
[
0
]
=
2
;
sDimSize1
[
0
]
=
2
;
sDimSize1
[
1
]
=
2
;
sDimSize1
[
1
]
=
2
;
int
sUnitNum1
=
1
;
int
sUnitNum1
=
1
;
for
(
int
i
=
0
;
i
<
sOrder1
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder1
;
i
++
)
sUnitNum1
*=
sDimSize1
[
i
];
sUnitNum1
*=
sDimSize1
[
i
];
/* a source tensor of size (2, 2) */
/* a source tensor of size (2, 2) */
int
sOrder2
=
2
;
int
sOrder2
=
2
;
int
*
sDimSize2
=
new
int
[
sOrder2
];
int
*
sDimSize2
=
new
int
[
sOrder2
];
sDimSize2
[
0
]
=
2
;
sDimSize2
[
0
]
=
2
;
sDimSize2
[
1
]
=
2
;
sDimSize2
[
1
]
=
2
;
int
sUnitNum2
=
1
;
int
sUnitNum2
=
1
;
for
(
int
i
=
0
;
i
<
sOrder2
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder2
;
i
++
)
sUnitNum2
*=
sDimSize2
[
i
];
sUnitNum2
*=
sDimSize2
[
i
];
/* a target tensor of size (2, 2) */
/* a target tensor of size (2, 2) */
int
tOrder
=
2
;
int
tOrder
=
2
;
int
*
tDimSize
=
new
int
[
tOrder
];
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
tDimSize
[
0
]
=
2
;
tDimSize
[
1
]
=
2
;
tDimSize
[
1
]
=
2
;
int
tUnitNum
=
1
;
int
tUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
}
};
{
2.0
F
,
3.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
1.0
F
,
1.0
F
},
DTYPE
sData2
[
2
][
2
]
=
{
{
1.0
F
,
1.0
F
},
{
4.0
F
,
9.0
F
}
};
{
4.0
F
,
9.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
0.5
F
,
0.3333
F
}
};
{
0.5
F
,
0.3333
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
tMe
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
tMe
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
tMe
->
SetData
(
sData1
,
sUnitNum1
);
tMe
->
SetData
(
sData1
,
sUnitNum1
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
s2
->
SetData
(
sData2
,
sUnitNum2
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* call Div function */
/* call Div function */
_Div
(
s1
,
s2
,
t
,
0
,
0
);
_Div
(
s1
,
s2
,
t
,
0
,
0
);
_DivMe
(
tMe
,
s2
,
0
,
0
);
_DivMe
(
tMe
,
s2
,
0
,
0
);
tUser
=
Div
(
*
s1
,
*
s2
,
0
);
tUser
=
Div
(
*
s1
,
*
s2
,
0
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
tMe
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
tMe
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
,
1e-4
F
);
tUser
.
CheckData
(
answer
,
tUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tMeGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tMeGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
tMeGPU
->
SetData
(
sData1
,
sUnitNum1
);
tMeGPU
->
SetData
(
sData1
,
sUnitNum1
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
sGPU2
->
SetData
(
sData2
,
sUnitNum2
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* call Div function */
/* call Div function */
_Div
(
sGPU1
,
sGPU2
,
tGPU
,
0
,
0
);
_Div
(
sGPU1
,
sGPU2
,
tGPU
,
0
,
0
);
_DivMe
(
tMeGPU
,
sGPU2
,
0
,
0
);
_DivMe
(
tMeGPU
,
sGPU2
,
0
,
0
);
tUserGPU
=
Div
(
*
sGPU1
,
*
sGPU2
,
0
);
tUserGPU
=
Div
(
*
sGPU1
,
*
sGPU2
,
0
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
tMeGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
tMeGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
F
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
,
1e-4
F
);
tUserGPU
.
CheckData
(
answer
,
tUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
delete
s2
;
delete
s2
;
delete
t
;
delete
t
;
...
@@ -133,7 +133,7 @@ bool TestDiv1()
...
@@ -133,7 +133,7 @@ bool TestDiv1()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
@@ -144,7 +144,7 @@ bool TestDiv1()
...
@@ -144,7 +144,7 @@ bool TestDiv1()
delete
[]
sDimSize2
;
delete
[]
sDimSize2
;
delete
[]
tDimSize
;
delete
[]
tDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -156,33 +156,33 @@ TODO!!
...
@@ -156,33 +156,33 @@ TODO!!
/* test for Div Function */
/* test for Div Function */
bool
TestDiv
()
bool
TestDiv
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Div] element-wise division of two tensors
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Div] element-wise division of two tensors
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestDiv1
();
caseFlag
=
TestDiv1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TDivDim.cpp
查看文件 @
771643c6
...
@@ -127,8 +127,8 @@ bool TestDivDim1()
...
@@ -127,8 +127,8 @@ bool TestDivDim1()
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
c
;
delete
c
;
delete
cMe
;
delete
cMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
delete
[]
bDimSize
;
...
@@ -241,8 +241,8 @@ bool TestDivDim2()
...
@@ -241,8 +241,8 @@ bool TestDivDim2()
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
c
;
delete
c
;
delete
cMe
;
delete
cMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
delete
[]
bDimSize
;
...
...
source/tensor/test/TExp.cpp
查看文件 @
771643c6
...
@@ -30,88 +30,88 @@ Set every entry to its exponent value.
...
@@ -30,88 +30,88 @@ Set every entry to its exponent value.
*/
*/
bool
TestExp1
()
bool
TestExp1
()
{
{
/* a tensor of size (3, 2) */
/* a tensor of size (3, 2) */
int
order
=
2
;
int
order
=
2
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
3
;
dimSize
[
0
]
=
3
;
dimSize
[
1
]
=
2
;
dimSize
[
1
]
=
2
;
int
unitNum
=
1
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
-
1.0
F
,
-
2.0
F
},
{
-
1.0
F
,
-
2.0
F
},
{
0.0
F
,
0.5
F
}
};
{
0.0
F
,
0.5
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
2.7183
F
,
7.3891
F
},
DTYPE
answer
[
3
][
2
]
=
{
{
2.7183
F
,
7.3891
F
},
{
0.3679
F
,
0.1353
F
},
{
0.3679
F
,
0.1353
F
},
{
1.0
F
,
1.6487
F
}
};
{
1.0
F
,
1.6487
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
aMe
=
NewTensor
(
order
,
dimSize
);
XTensor
*
aMe
=
NewTensor
(
order
,
dimSize
);
XTensor
bUser
;
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
unitNum
);
a
->
SetData
(
aData
,
unitNum
);
aMe
->
SetData
(
aData
,
unitNum
);
aMe
->
SetData
(
aData
,
unitNum
);
/* call Exp function */
/* call Exp function */
_Exp
(
a
,
b
);
_Exp
(
a
,
b
);
_ExpMe
(
aMe
);
_ExpMe
(
aMe
);
bUser
=
Exp
(
*
a
);
bUser
=
Exp
(
*
a
);
/* check results */
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
cpuTest
=
b
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
bUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
unitNum
);
aGPU
->
SetData
(
aData
,
unitNum
);
aMeGPU
->
SetData
(
aData
,
unitNum
);
aMeGPU
->
SetData
(
aData
,
unitNum
);
/* call Exp function */
/* call Exp function */
_Exp
(
aGPU
,
bGPU
);
_Exp
(
aGPU
,
bGPU
);
_ExpMe
(
aMeGPU
);
_ExpMe
(
aMeGPU
);
bUserGPU
=
Exp
(
*
aGPU
);
bUserGPU
=
Exp
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
gpuTest
=
bGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
\
aMeGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
\
bUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
bUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
aMeGPU
;
delete
[]
dimSize
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
b
;
delete
aMe
;
delete
aMe
;
delete
[]
dimSize
;
delete
[]
dimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
...
@@ -123,33 +123,33 @@ TODO!!
...
@@ -123,33 +123,33 @@ TODO!!
/* test for Exp Function */
/* test for Exp Function */
bool
TestExp
()
bool
TestExp
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Exp] set every entry to its exponent value
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Exp] set every entry to its exponent value
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
caseFlag
=
TestExp1
();
caseFlag
=
TestExp1
();
if
(
!
caseFlag
)
{
if
(
!
caseFlag
)
{
returnFlag
=
false
;
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
*/
*/
if
(
returnFlag
)
{
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
}
else
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
return
returnFlag
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/THardTanH.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TIdentity.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TLog.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TLogSoftmax.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TMerge.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TMultiply.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TNegate.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TNormalize.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TPower.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TRectify.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TRound.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSigmoid.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSign.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSin.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSoftmax.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSplit.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSub.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSubDim.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSum.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TSumDim.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TTan.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/TTranspose.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
source/tensor/test/Test.cpp
查看文件 @
771643c6
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论