Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
魏冰浩
Tensor.LowPrecision
Commits
fe868e5c
Commit
fe868e5c
authored
5 years ago
by
linye
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
6a3d713a
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
1091 行增加
和
51 行删除
+1091
-51
source/tensor/core/arithmetic/MultiplyDim.cu
+1
-1
source/tensor/core/arithmetic/Negate.cu
+0
-20
source/tensor/core/arithmetic/Negate.cuh
+2
-5
source/tensor/core/arithmetic/XTensorBLAS.cu
+1
-1
source/tensor/function/LogSoftmax.cu
+8
-8
source/tensor/test/TClip.cpp
+277
-0
source/tensor/test/TLogSoftmax.cpp
+213
-15
source/tensor/test/TMultiplyDim.cpp
+219
-0
source/tensor/test/TNegate.cpp
+92
-0
source/tensor/test/TScaleAndShift.cpp
+277
-0
source/tensor/test/Test.cpp
+1
-1
没有找到文件。
source/tensor/core/arithmetic/MultiplyDim.cu
查看文件 @
fe868e5c
...
...
@@ -169,7 +169,7 @@ void _CudaMultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n,
ShowNTErrors("Something is wrong!");
}
}
if (a->dataType == X_FLOAT16) {
else
if (a->dataType == X_FLOAT16) {
unsigned short temp = FloatToFloat16(alpha);
half alpha1 = *((half *)&temp);
if (stride > 1) {
...
...
This diff is collapsed.
Click to expand it.
source/tensor/core/arithmetic/Negate.cu
查看文件 @
fe868e5c
...
...
@@ -43,26 +43,6 @@ void KernelNegate(T * a, T * b, int size)
b[i] = -a[i];
}
///*
//set each entry to its negtive value (CUDA Kernel)
//This is for float16 computation
//>> a - pointer to the input data array
//>> b - pointer to the output data array
//>> size - size of the data array
//*/
//__global__
//void KernelNegate(__half * a, __half * b, int size)
//{
// int i = blockDim.x * blockIdx.x + threadIdx.x;
//
//#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
// if (i < size)
// b[i] = __hsub(__float2half(0), a[i]);
//#else
// if (i < size)
// b[i] = __float2half(-__half2float(a[i]));
//#endif
//}
/*
set each entry to its negtive value
...
...
This diff is collapsed.
Click to expand it.
source/tensor/core/arithmetic/Negate.cuh
查看文件 @
fe868e5c
...
...
@@ -29,12 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its negtive value (CUDA Kernel) */
template <class T>
__global__
void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__
void KernelNegate(__half * a, __half * b, int size);
void KernelNegate(T * a, T * b, int size);
/* set each entry to its negtive value */
void _CudaNegate(const XTensor * a, XTensor * b);
...
...
This diff is collapsed.
Click to expand it.
source/tensor/core/arithmetic/XTensorBLAS.cu
查看文件 @
fe868e5c
...
...
@@ -17,7 +17,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16
/int8
added
*/
#include "../../XUtility.h"
...
...
This diff is collapsed.
Click to expand it.
source/tensor/function/LogSoftmax.cu
查看文件 @
fe868e5c
...
...
@@ -83,7 +83,7 @@ void KernelLogSoftmaxComputeByRow(T * x, T * max, T * sum, T * y, int rowNum, in
int key = i * colNum + j;
if (dataType == X_FLOAT) {
DTYPE r = log((DTYPE)exp(
x[key] - inputMax[threadIdx.x]
) / (DTYPE)inputSum[threadIdx.x]);
DTYPE r = log((DTYPE)exp(
(DTYPE)(x[key] - inputMax[threadIdx.x])
) / (DTYPE)inputSum[threadIdx.x]);
if (isnan(r))
r = LOGPROB_MIN;
...
...
@@ -137,7 +137,7 @@ void KernelLogSoftmaxComputeByCol(T * x, T * max, T * sum, T * y, int rowNum, in
if (i < rowNum && j < colNum) {
int key = i * colNum + j;
if (dataType == X_FLOAT) {
DTYPE r = log((DTYPE)exp(
x[key] - inputMax[threadIdx.y]
) / (DTYPE)inputSum[threadIdx.y]);
DTYPE r = log((DTYPE)exp(
(DTYPE)(x[key] - inputMax[threadIdx.y])
) / (DTYPE)inputSum[threadIdx.y]);
if (isnan(r))
r = LOGPROB_MIN;
...
...
@@ -247,10 +247,10 @@ void KernelExpLoss(T * dedy, T * dedx, T * y, int size, LOSS_FUNCTION_NAME lossN
if (i < size) {
/* dE/dx_j = exp(y_j) */
if (lossName == CROSSENTROPY)
dedx[i] = exp(
y[i]
);
dedx[i] = exp(
((DTYPE)y[i])
);
/* dE/dx_j = exp(y_j) */
else if (lossName == SQUAREDERROR)
dedx[i] = exp(
y[i]
);
dedx[i] = exp(
((DTYPE)y[i])
);
else if (lossName == ONEHOTERROR)
dedx[i] = 0;
else
...
...
@@ -283,13 +283,13 @@ void KernelLogSoftmaxBackwardDEDS(T * dedy, T * dedx, T * gold, T * y, T * x,
DTYPE r = 0;
/* dE/ds_j = exp(y_j) */
if (lossName == CROSSENTROPY)
r = -(DTYPE)gold[i] + (DTYPE)exp(
y[i]
);
r = -(DTYPE)gold[i] + (DTYPE)exp(
((DTYPE)y[i])
);
/* dE/ds_j = exp(y_j) */
else if (lossName == SQUAREDERROR)
r = -(DTYPE)gold[i] + (DTYPE)exp(
y[i]
);
r = -(DTYPE)gold[i] + (DTYPE)exp(
((DTYPE)y[i])
);
else if (lossName == ONEHOTERROR) {
if ((DTYPE)gold[i] == 1.0)
r = -(DTYPE)gold[i] + (DTYPE)exp(
y[i]
);
r = -(DTYPE)gold[i] + (DTYPE)exp(
((DTYPE)y[i])
);
else
r = 0;
}
...
...
@@ -366,7 +366,7 @@ void KernelLogSoftmaxBackwardDEDSSparseByRow(T * dedy, T * dedx, void * gold, T
else if (lossName == ONEHOTERROR) {
int offset = colNum * ni + mi;
if (value == 1.0F)
dedx[offset] += (-value + exp(
y[offset]
));
dedx[offset] += (-value + exp(
((DTYPE)y[offset])
));
//dedx[offset] += -value * 0.005;
}
}
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/TClip.cpp
查看文件 @
fe868e5c
...
...
@@ -17,11 +17,13 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "../XTensor.h"
#include "../core/math/Clip.h"
#include "TClip.h"
#include "../core/getandset/ConvertDataType.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -116,6 +118,251 @@ bool TestClip1()
#endif // USE_CUDA
}
/*
case 2: float16 test Clip function.
Set every entry to its clip value.
*/
bool
TestClip2
()
{
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
0.0
F
,
4.0
F
},
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
{
0.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* create float16 tensor */
XTensor
aHalfGPU
;
XTensor
bHalfGPU
;
XTensor
aMeHalfGPU
;
XTensor
bUserHalfGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* convert data type from float to float16 */
aHalfGPU
=
ConvertDataType
(
*
aGPU
,
X_FLOAT16
);
aMeHalfGPU
=
ConvertDataType
(
*
aMeGPU
,
X_FLOAT16
);
bHalfGPU
=
ConvertDataType
(
*
bGPU
,
X_FLOAT16
);
/* call clip function */
_Clip
(
&
aHalfGPU
,
&
bHalfGPU
,
-
1.0
,
1.0
);
_ClipMe
(
&
aMeHalfGPU
,
-
1.0
,
1.0
);
bUserHalfGPU
=
Clip
(
aHalfGPU
,
-
1.0
,
1.0
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
bHalfGPU
,
bGPU
);
_ConvertDataType
(
&
aMeHalfGPU
,
aMeGPU
);
bUserGPU
=
ConvertDataType
(
bUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 3: int32 test Clip function.
Set every entry to its clip value.
*/
bool
TestClip3
()
{
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
0.0
F
,
4.0
F
},
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
{
0.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* create int32 tensor */
XTensor
aInt32GPU
;
XTensor
bInt32GPU
;
XTensor
aMeInt32GPU
;
XTensor
bUserInt32GPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* convert data type from float to int32 */
aInt32GPU
=
ConvertDataType
(
*
aGPU
,
X_INT
);
aMeInt32GPU
=
ConvertDataType
(
*
aMeGPU
,
X_INT
);
bInt32GPU
=
ConvertDataType
(
*
bGPU
,
X_INT
);
/* call clip function */
_Clip
(
&
aInt32GPU
,
&
bInt32GPU
,
-
1.0
,
1.0
);
_ClipMe
(
&
aMeInt32GPU
,
-
1.0
,
1.0
);
bUserInt32GPU
=
Clip
(
aInt32GPU
,
-
1.0
,
1.0
);
/* convert data type from int32 to float */
_ConvertDataType
(
&
bInt32GPU
,
bGPU
);
_ConvertDataType
(
&
aMeInt32GPU
,
aMeGPU
);
bUserGPU
=
ConvertDataType
(
bUserInt32GPU
,
X_FLOAT
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 4: int8 test Clip function.
Set every entry to its clip value.
*/
bool
TestClip4
()
{
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
0.0
F
,
4.0
F
},
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
{
0.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* create int8 tensor */
XTensor
aInt8GPU
;
XTensor
bInt8GPU
;
XTensor
aMeInt8GPU
;
XTensor
bUserInt8GPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* convert data type from float to int8 */
aInt8GPU
=
ConvertDataType
(
*
aGPU
,
X_INT8
);
aMeInt8GPU
=
ConvertDataType
(
*
aMeGPU
,
X_INT8
);
bInt8GPU
=
ConvertDataType
(
*
bGPU
,
X_INT8
);
/* call clip function */
_Clip
(
&
aInt8GPU
,
&
bInt8GPU
,
-
1.0
,
1.0
);
_ClipMe
(
&
aMeInt8GPU
,
-
1.0
,
1.0
);
bUserInt8GPU
=
Clip
(
aInt8GPU
,
-
1.0
,
1.0
);
/* convert data type from int8 to float */
_ConvertDataType
(
&
bInt8GPU
,
bGPU
);
_ConvertDataType
(
&
aMeInt8GPU
,
aMeGPU
);
bUserGPU
=
ConvertDataType
(
bUserInt8GPU
,
X_FLOAT
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
...
...
@@ -137,6 +384,36 @@ bool TestClip()
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestClip2
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestClip3
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
/* case 4 test */
caseFlag
=
TestClip4
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 4 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 4 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/TLogSoftmax.cpp
查看文件 @
fe868e5c
...
...
@@ -17,7 +17,7 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-
06
float16 added
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-
12
float16 added
*/
#include "../XUtility.h"
...
...
@@ -208,7 +208,7 @@ bool TestLogSoftmax2()
#endif // USE_CUDA
}
/*
/*
case 3: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
...
...
@@ -250,12 +250,12 @@ bool TestLogSoftmax3()
/* call LogSoftmax function */
_LogSoftmax
(
x
,
y
,
1
);
/* call LogSoftmaxBackward function */
_LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
NULL
,
1
,
SQUAREDERROR
);
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
cpuTest
=
y
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
&&
dedx
->
CheckData
(
dedxAnswer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
...
...
@@ -281,10 +281,10 @@ bool TestLogSoftmax3()
/* call LogSoftmaxBackward function */
_LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
NULL
,
1
,
SQUAREDERROR
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
unitNum
,
1e-3
F
);
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
unitNum
,
1e-3
F
);
/* destroy variables */
delete
x
;
...
...
@@ -313,11 +313,6 @@ bool TestLogSoftmax3()
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/*
case 4: float16 test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
...
...
@@ -370,14 +365,193 @@ bool TestLogSoftmax4()
/* convert data type from float16 to float */
_ConvertDataType
(
&
yHalfGPU
,
yGPU
);
yUserGPU
=
ConvertDataType
(
yHalfGPU
,
X_FLOAT
);
yUserGPU
=
ConvertDataType
(
yUserHalfGPU
,
X_FLOAT
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-2
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-2
F
);
/* destroy variables */
delete
xGPU
;
delete
yGPU
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
dimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 5: float16 test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
In this case, LossName=CROSSENTROPY.
*/
bool
TestLogSoftmax5
()
{
/* a tensor of size (1, 3) */
int
order
=
2
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
1
;
dimSize
[
1
]
=
3
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
xData
[
1
][
3
]
=
{
0.0
F
,
1.0
F
,
2.0
F
};
DTYPE
gData
[
1
][
3
]
=
{
0.5
F
,
0.8
F
,
1.5
F
};
DTYPE
yAnswer
[
1
][
3
]
=
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
};
DTYPE
dedxAnswer
[
1
][
3
]
=
{
-
0.4100
F
,
-
0.5553
F
,
-
0.8348
F
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
gGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
dedyGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
dedxGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
xHalfGPU
;
XTensor
yHalfGPU
;
XTensor
gHalfGPU
;
XTensor
dedyHalfGPU
;
XTensor
dedxHalfGPU
;
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
gGPU
->
SetData
(
gData
,
unitNum
);
yGPU
->
SetZeroAll
();
dedxGPU
->
SetZeroAll
();
dedyGPU
->
SetZeroAll
();
/* convert data type from float to float16 */
xHalfGPU
=
ConvertDataType
(
*
xGPU
,
X_FLOAT16
);
yHalfGPU
=
ConvertDataType
(
*
yGPU
,
X_FLOAT16
);
gHalfGPU
=
ConvertDataType
(
*
gGPU
,
X_FLOAT16
);
dedyHalfGPU
=
ConvertDataType
(
*
dedyGPU
,
X_FLOAT16
);
dedxHalfGPU
=
ConvertDataType
(
*
dedxGPU
,
X_FLOAT16
);
/* call logsoftmax function */
_LogSoftmax
(
&
xHalfGPU
,
&
yHalfGPU
,
1
);
/* call logsoftmaxbackward function */
_LogSoftmaxBackward
(
&
gHalfGPU
,
&
yHalfGPU
,
&
xHalfGPU
,
&
dedyHalfGPU
,
&
dedxHalfGPU
,
NULL
,
1
,
CROSSENTROPY
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
yHalfGPU
,
yGPU
);
_ConvertDataType
(
&
dedxHalfGPU
,
dedxGPU
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-2
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
unitNum
,
1e-2
F
);
/* destroy variables */
delete
xGPU
;
delete
yGPU
;
delete
gGPU
;
delete
dedxGPU
;
delete
dedyGPU
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
dimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 6: float16 test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
In this case, LossName=SQUAREDERROR
*/
bool
TestLogSoftmax6
()
{
/* a tensor of size (1, 3) */
int
order
=
2
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
1
;
dimSize
[
1
]
=
3
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
xData
[
1
][
3
]
=
{
0.0
F
,
1.0
F
,
2.0
F
};
DTYPE
gData
[
1
][
3
]
=
{
0.5
F
,
0.8
F
,
1.5
F
};
DTYPE
yAnswer
[
1
][
3
]
=
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
};
DTYPE
dedxAnswer
[
1
][
3
]
=
{
-
0.4100
F
,
-
0.5553
F
,
-
0.8348
F
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
gGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
dedyGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
dedxGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
xHalfGPU
;
XTensor
yHalfGPU
;
XTensor
gHalfGPU
;
XTensor
dedyHalfGPU
;
XTensor
dedxHalfGPU
;
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
gGPU
->
SetData
(
gData
,
unitNum
);
yGPU
->
SetZeroAll
();
dedxGPU
->
SetZeroAll
();
dedyGPU
->
SetZeroAll
();
/* convert data type from float to float16 */
xHalfGPU
=
ConvertDataType
(
*
xGPU
,
X_FLOAT16
);
yHalfGPU
=
ConvertDataType
(
*
yGPU
,
X_FLOAT16
);
gHalfGPU
=
ConvertDataType
(
*
gGPU
,
X_FLOAT16
);
dedyHalfGPU
=
ConvertDataType
(
*
dedyGPU
,
X_FLOAT16
);
dedxHalfGPU
=
ConvertDataType
(
*
dedxGPU
,
X_FLOAT16
);
/* call logsoftmax function */
_LogSoftmax
(
&
xHalfGPU
,
&
yHalfGPU
,
1
);
/* call logsoftmaxbackward function */
_LogSoftmaxBackward
(
&
gHalfGPU
,
&
yHalfGPU
,
&
xHalfGPU
,
&
dedyHalfGPU
,
&
dedxHalfGPU
,
NULL
,
1
,
SQUAREDERROR
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
yHalfGPU
,
yGPU
);
_ConvertDataType
(
&
dedxHalfGPU
,
dedxGPU
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-1
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-1
F
);
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-2
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
unitNum
,
1e-2
F
);
/* destroy variables */
delete
xGPU
;
delete
yGPU
;
delete
gGPU
;
delete
dedxGPU
;
delete
dedyGPU
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
...
...
@@ -389,6 +563,10 @@ bool TestLogSoftmax4()
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for LogSoftmax Function */
bool
TestLogSoftmax
()
...
...
@@ -436,6 +614,26 @@ bool TestLogSoftmax()
else
XPRINT
(
0
,
stdout
,
">> case 4 passed!
\n
"
);
/* case 5 test */
caseFlag
=
TestLogSoftmax5
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 5 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 5 passed!
\n
"
);
/* case 6 test */
caseFlag
=
TestLogSoftmax6
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 6 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 6 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/TMultiplyDim.cpp
查看文件 @
fe868e5c
...
...
@@ -17,11 +17,13 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-30
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TMultiplyDim.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../XTensor.h"
#include "../core/getandset/ConvertDataType.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
...
...
@@ -248,6 +250,205 @@ bool TestMultiplyDim2()
#endif // USE_CUDA
}
/*
case 3: float16 tensor multiplication c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
In this case, (2, 4) * (2) = (2, 4), n = 0.
*/
bool
TestMultiplyDim3
()
{
/* a tensor of size (2, 4) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
2
;
aDimSize
[
1
]
=
4
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (2) */
int
bOrder
=
1
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
2
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
]
=
{
1.0
F
,
-
1.0
F
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* create float16 tensor */
XTensor
aHalfGPU
;
XTensor
bHalfGPU
;
XTensor
cHalfGPU
;
XTensor
cMeHalfGPU
;
XTensor
cUserHalfGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
cMeGPU
->
SetData
(
aData
,
aUnitNum
);
bGPU
->
SetData
(
bData
,
bUnitNum
);
cGPU
->
SetZeroAll
();
/* convert data type from float to float16 */
aHalfGPU
=
ConvertDataType
(
*
aGPU
,
X_FLOAT16
);
bHalfGPU
=
ConvertDataType
(
*
bGPU
,
X_FLOAT16
);
cHalfGPU
=
ConvertDataType
(
*
cGPU
,
X_FLOAT16
);
cMeHalfGPU
=
ConvertDataType
(
*
cMeGPU
,
X_FLOAT16
);
/* call multiplydim function */
_MultiplyDim
(
&
aHalfGPU
,
&
bHalfGPU
,
&
cHalfGPU
,
0
);
_MultiplyDimMe
(
&
cMeHalfGPU
,
&
bHalfGPU
,
0
);
cUserHalfGPU
=
MultiplyDim
(
aHalfGPU
,
bHalfGPU
,
0
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
cHalfGPU
,
cGPU
);
_ConvertDataType
(
&
cMeHalfGPU
,
cMeGPU
);
cUserGPU
=
ConvertDataType
(
cUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 4: flaot16 tensor multiplication c = a*b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting.
In this case, (2, 4) * (4) = (2, 4), n = 1.
*/
bool
TestMultiplyDim4
()
{
/* a tensor of size (2, 4) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
2
;
aDimSize
[
1
]
=
4
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (4) */
int
bOrder
=
1
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
4
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
4
]
=
{
1.0
F
,
-
1.0
F
,
1.0
F
,
-
1.0
F
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.0
F
,
-
1.0
F
,
2.0
F
,
-
3.0
F
},
{
4.0
F
,
-
5.0
F
,
6.0
F
,
-
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* create float16 tensor */
XTensor
aHalfGPU
;
XTensor
bHalfGPU
;
XTensor
cHalfGPU
;
XTensor
cMeHalfGPU
;
XTensor
cUserHalfGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
cMeGPU
->
SetData
(
aData
,
aUnitNum
);
bGPU
->
SetData
(
bData
,
bUnitNum
);
cGPU
->
SetZeroAll
();
/* convert data type from float to float16 */
aHalfGPU
=
ConvertDataType
(
*
aGPU
,
X_FLOAT16
);
bHalfGPU
=
ConvertDataType
(
*
bGPU
,
X_FLOAT16
);
cHalfGPU
=
ConvertDataType
(
*
cGPU
,
X_FLOAT16
);
cMeHalfGPU
=
ConvertDataType
(
*
cMeGPU
,
X_FLOAT16
);
/* call multiplydim function */
_MultiplyDim
(
&
aHalfGPU
,
&
bHalfGPU
,
&
cHalfGPU
,
1
);
_MultiplyDimMe
(
&
cMeHalfGPU
,
&
bHalfGPU
,
1
);
cUserHalfGPU
=
MultiplyDim
(
aHalfGPU
,
bHalfGPU
,
1
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
cHalfGPU
,
cGPU
);
_ConvertDataType
(
&
cMeHalfGPU
,
cMeGPU
);
cUserGPU
=
ConvertDataType
(
cUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* test for MultiplyDim Function */
bool
TestMultiplyDim
()
{
...
...
@@ -272,6 +473,24 @@ bool TestMultiplyDim()
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestMultiplyDim3
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
/* case 4 test */
caseFlag
=
TestMultiplyDim4
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 4 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 4 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/TNegate.cpp
查看文件 @
fe868e5c
...
...
@@ -17,9 +17,11 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TNegate.h"
#include "../core/getandset/ConvertDataType.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -191,6 +193,86 @@ bool TestNegate2()
#endif // USE_CUDA
}
/* case 3: float16 set every entry to its minus value */
bool
TestNegate3
()
{
/* a tensor of size (3, 2) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
3
;
aDimSize
[
1
]
=
2
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
-
3.0
F
,
4.0
F
},
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
1.0
F
,
2.0
F
},
{
3.0
F
,
-
4.0
F
},
{
-
5.0
F
,
6.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* create float16 tensor */
XTensor
aHalfGPU
;
XTensor
bHalfGPU
;
XTensor
aMeHalfGPU
;
XTensor
bUserHalfGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* convert data type from float to float16 */
aHalfGPU
=
ConvertDataType
(
*
aGPU
,
X_FLOAT16
);
aMeHalfGPU
=
ConvertDataType
(
*
aMeGPU
,
X_FLOAT16
);
bHalfGPU
=
ConvertDataType
(
*
bGPU
,
X_FLOAT16
);
/* call negate function */
_Negate
(
&
aHalfGPU
,
&
bHalfGPU
);
_NegateMe
(
&
aMeHalfGPU
);
bUserHalfGPU
=
Negate
(
aHalfGPU
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
bHalfGPU
,
bGPU
);
_ConvertDataType
(
&
aMeHalfGPU
,
aMeGPU
);
bUserGPU
=
ConvertDataType
(
bUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
/* other cases */
/*
TODO!!
...
...
@@ -222,6 +304,16 @@ bool TestNegate()
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestNegate3
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/TScaleAndShift.cpp
查看文件 @
fe868e5c
...
...
@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TScaleAndShift.h"
#include "../core/getandset/ConvertDataType.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -113,6 +115,254 @@ bool TestScaleAndShift1()
#endif // USE_CUDA
}
/*
case 2: flaot16 scale and shift all tensor entires.
p = p * scale + shift
*/
bool
TestScaleAndShift2
()
{
/* a input tensor of size (2, 4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
F
,
2.5
F
,
4.5
F
,
6.5
F
},
{
8.5
F
,
10.5
F
,
12.5
F
,
14.5
F
}
};
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
0.5
F
;
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tMeGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* create float16 tensor */
XTensor
sHalfGPU
;
XTensor
tHalfGPU
;
XTensor
tMeHalfGPU
;
XTensor
tUserHalfGPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
tMeGPU
->
SetData
(
sData
,
sUnitNum
);
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
tMeHalfGPU
=
ConvertDataType
(
*
tMeGPU
,
X_FLOAT16
);
tHalfGPU
=
ConvertDataType
(
*
tGPU
,
X_FLOAT16
);
/* call scaleandshift function */
_ScaleAndShift
(
&
sHalfGPU
,
&
tHalfGPU
,
scaleFactor
,
shiftFactor
);
_ScaleAndShiftMe
(
&
tMeHalfGPU
,
scaleFactor
,
shiftFactor
);
tUserHalfGPU
=
ScaleAndShift
(
sHalfGPU
,
scaleFactor
,
shiftFactor
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
tHalfGPU
,
tGPU
);
_ConvertDataType
(
&
tMeHalfGPU
,
tMeGPU
);
tUserGPU
=
ConvertDataType
(
tUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
sUnitNum
)
&&
tMeGPU
->
CheckData
(
answer
,
sUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
sUnitNum
);
/* destroy variables */
delete
sGPU
;
delete
tGPU
;
delete
tMeGPU
;
delete
[]
sDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 3: int32 scale and shift all tensor entires.
p = p * scale + shift
*/
bool
TestScaleAndShift3
()
{
/* a input tensor of size (2, 4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
3.0
F
,
5.0
F
,
7.0
F
},
{
9.0
F
,
11.0
F
,
13.0
F
,
15.0
F
}
};
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
1.8
F
;
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tMeGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* create int32 tensor */
XTensor
sInt32GPU
;
XTensor
tInt32GPU
;
XTensor
tMeInt32GPU
;
XTensor
tUserInt32GPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
tMeGPU
->
SetData
(
sData
,
sUnitNum
);
/* convert data type from float to int32 */
sInt32GPU
=
ConvertDataType
(
*
sGPU
,
X_INT
);
tMeInt32GPU
=
ConvertDataType
(
*
tMeGPU
,
X_INT
);
tInt32GPU
=
ConvertDataType
(
tGPU
,
X_INT
);
/* call scaleandshift function */
_ScaleAndShift
(
&
sInt32GPU
,
&
tInt32GPU
,
scaleFactor
,
shiftFactor
);
_ScaleAndShiftMe
(
&
tMeInt32GPU
,
scaleFactor
,
shiftFactor
);
tUserInt32GPU
=
ScaleAndShift
(
sInt32GPU
,
scaleFactor
,
shiftFactor
);
/* convert data type from int32 to float */
_ConvertDataType
(
&
tInt32GPU
,
tGPU
);
_ConvertDataType
(
&
tMeInt32GPU
,
tMeGPU
);
tUserGPU
=
ConvertDataType
(
tUserInt32GPU
,
X_FLOAT
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
sUnitNum
)
&&
tMeGPU
->
CheckData
(
answer
,
sUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
sUnitNum
);
/* destroy variables */
delete
sGPU
;
delete
tGPU
;
delete
tMeGPU
;
delete
[]
sDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 4: int8 scale and shift all tensor entires.
p = p * scale + shift
*/
bool
TestScaleAndShift4
()
{
/* a input tensor of size (2, 4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
3.0
F
,
5.0
F
,
7.0
F
},
{
9.0
F
,
11.0
F
,
13.0
F
,
15.0
F
}
};
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
1.8
F
;
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tMeGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* create int8 tensor */
XTensor
sInt8GPU
;
XTensor
tInt8GPU
;
XTensor
tMeInt8GPU
;
XTensor
tUserInt8GPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
tMeGPU
->
SetData
(
sData
,
sUnitNum
);
/* convert data type from float to int8 */
sInt8GPU
=
ConvertDataType
(
*
sGPU
,
X_INT8
);
tMeInt8GPU
=
ConvertDataType
(
*
tMeGPU
,
X_INT8
);
tInt8GPU
=
ConvertDataType
(
*
tGPU
,
X_INT8
);
/* call scaleandshift function */
_ScaleAndShift
(
&
sInt8GPU
,
&
tInt8GPU
,
scaleFactor
,
shiftFactor
);
_ScaleAndShiftMe
(
&
tMeInt8GPU
,
scaleFactor
,
shiftFactor
);
tUserInt8GPU
=
ScaleAndShift
(
sInt8GPU
,
scaleFactor
,
shiftFactor
);
/* convert data type from int8 to float */
_ConvertDataType
(
&
tInt8GPU
,
tGPU
);
_ConvertDataType
(
&
tMeInt8GPU
,
tMeGPU
);
tUserGPU
=
ConvertDataType
(
tUserInt8GPU
,
X_FLOAT
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
sUnitNum
)
&&
tMeGPU
->
CheckData
(
answer
,
sUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
sUnitNum
);
/* destroy variables */
delete
sGPU
;
delete
tGPU
;
delete
tMeGPU
;
delete
[]
sDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
...
...
@@ -133,6 +383,33 @@ bool TestScaleAndShift()
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestScaleAndShift2
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestScaleAndShift3
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
/* case 4 test */
caseFlag
=
TestScaleAndShift4
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 4 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 4 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/Test.cpp
查看文件 @
fe868e5c
...
...
@@ -70,7 +70,7 @@ bool Test()
//wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong;
wrong
=
!
TestSum
()
||
wrong
;
//
wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestSumDim() || wrong;
...
...
This diff is collapsed.
Click to expand it.
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论