Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
aa875fba
Commit
aa875fba
authored
Jul 22, 2019
by
linye
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1. implement SetData by template 2. update float16 datatype of SetData
parent
9f14dc72
隐藏空白字符变更
内嵌
并排
正在显示
14 个修改的文件
包含
730 行增加
和
401 行删除
+730
-401
source/network/Main.cpp
+2
-2
source/network/XBackwardLoss.cpp
+2
-18
source/tensor/core/getandset/SetData.cpp
+55
-135
source/tensor/core/getandset/SetData.cu
+135
-173
source/tensor/core/getandset/SetData.cuh
+6
-10
source/tensor/core/getandset/SetData.h
+7
-17
source/tensor/function/DropoutWithIndex.cpp
+1
-1
source/tensor/function/Loss.cpp
+3
-3
source/tensor/test/TDropout.cpp
+6
-6
source/tensor/test/TReduceSum.cpp
+15
-15
source/tensor/test/TSetData.cpp
+483
-6
source/tensor/test/TSpread.cpp
+2
-2
source/tensor/test/TSumDim.cpp
+6
-6
source/tensor/test/Test.cpp
+7
-7
没有找到文件。
source/network/Main.cpp
查看文件 @
aa875fba
...
...
@@ -399,8 +399,8 @@ void xcTest()
InitTensor2D
(
&
t2
,
2
,
4
,
X_FLOAT
,
0
,
NULL
);
XTensor
tensor
;
_SetDataFixed
Float
(
&
t1
,
1.0
F
);
_SetDataFixed
Float
(
&
t2
,
2.0
F
);
_SetDataFixed
(
&
t1
,
1.0
F
);
_SetDataFixed
(
&
t2
,
2.0
F
);
tensor
=
t1
+
t2
;
...
...
source/network/XBackwardLoss.cpp
查看文件 @
aa875fba
...
...
@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor
*
dedy
=
output
->
grad
;
if
(
income
.
tailNum
==
1
)
{
if
(
dedy
->
dataType
==
X_FLOAT
)
_SetDataFixedFloat
(
dedy
,
1.0
F
);
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
_SetDataFixedDouble
(
dedy
,
1.0
);
else
if
(
dedy
->
dataType
==
X_INT
)
_SetDataFixedInt
(
dedy
,
1
);
else
ShowNTErrors
(
"TODO"
);
_SetDataFixed
(
dedy
,
1.0
F
);
return
;
}
...
...
@@ -144,15 +136,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME
lossName
)
{
if
(
gold
==
NULL
){
if
(
dedy
->
dataType
==
X_FLOAT
)
_SetDataFixedFloat
(
dedy
,
1.0
F
);
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
_SetDataFixedDouble
(
dedy
,
1.0
);
else
if
(
dedy
->
dataType
==
X_INT
)
_SetDataFixedInt
(
dedy
,
1
);
else
{
ShowNTErrors
(
"TODO"
);
}
_SetDataFixed
(
dedy
,
1.0
F
);
return
;
}
...
...
source/tensor/core/getandset/SetData.cpp
查看文件 @
aa875fba
...
...
@@ -25,6 +25,7 @@
#include "SetData.cuh"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "ConvertDataType.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
...
...
@@ -77,153 +78,78 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
}
/*
generate data items with a fixed value
p
generate data items with a fixed value
>> tensor - the tensor whose data array would be initialized
>>
p
- pointer to the number for initializing the tensor
>>
value
- pointer to the number for initializing the tensor
*/
void
_SetDataFixed
(
XTensor
*
tensor
,
void
*
valuePointer
)
template
<
class
T
>
void
_SetDataFixed
(
XTensor
*
tensor
,
T
value
)
{
#ifdef USE_CUDA
if
(
tensor
->
devID
>=
0
)
{
_CudaSetDataFixed
(
tensor
,
value
);
return
;
}
#endif
int
num
=
tensor
->
unitNum
;
if
(
tensor
->
dataType
==
X_INT
){
int
p
=
*
(
int
*
)
valuePointer
;
if
(
tensor
->
devID
<
0
){
int
*
d
=
(
int
*
)
tensor
->
data
;
if
(
num
%
4
==
0
){
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
){
d
[
i
]
=
p
;
d
[
i
+
1
]
=
p
;
d
[
i
+
2
]
=
p
;
d
[
i
+
3
]
=
p
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
p
;
if
(
tensor
->
dataType
==
X_INT
)
{
int
*
d
=
(
int
*
)
tensor
->
data
;
int
v
=
(
int
)
value
;
if
(
num
%
4
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
d
[
i
]
=
v
;
d
[
i
+
1
]
=
v
;
d
[
i
+
2
]
=
v
;
d
[
i
+
3
]
=
v
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataFixedInt
(
tensor
,
p
);
#endif
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
v
;
}
}
else
if
(
tensor
->
dataType
==
X_FLOAT
){
float
p
=
*
(
float
*
)
valuePointer
;
if
(
tensor
->
devID
<
0
){
float
*
d
=
(
float
*
)
tensor
->
data
;
if
(
num
%
4
==
0
){
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
){
d
[
i
]
=
p
;
d
[
i
+
1
]
=
p
;
d
[
i
+
2
]
=
p
;
d
[
i
+
3
]
=
p
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
p
;
else
if
(
tensor
->
dataType
==
X_FLOAT
)
{
float
*
d
=
(
float
*
)
tensor
->
data
;
float
v
=
(
float
)
value
;
if
(
num
%
4
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
d
[
i
]
=
v
;
d
[
i
+
1
]
=
v
;
d
[
i
+
2
]
=
v
;
d
[
i
+
3
]
=
v
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataFixedFloat
(
tensor
,
p
);
#endif
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
v
;
}
}
else
if
(
tensor
->
dataType
==
X_DOUBLE
){
double
p
=
*
(
double
*
)
valuePointer
;
if
(
tensor
->
devID
<
0
){
double
*
d
=
(
double
*
)
tensor
->
data
;
if
(
num
%
4
==
0
){
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
){
d
[
i
]
=
p
;
d
[
i
+
1
]
=
p
;
d
[
i
+
2
]
=
p
;
d
[
i
+
3
]
=
p
;
}
}
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
p
;
else
if
(
tensor
->
dataType
==
X_DOUBLE
)
{
double
*
d
=
(
double
*
)
tensor
->
data
;
double
v
=
(
double
)
value
;
if
(
num
%
4
==
0
)
{
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
d
[
i
]
=
v
;
d
[
i
+
1
]
=
v
;
d
[
i
+
2
]
=
v
;
d
[
i
+
3
]
=
v
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataFixedDouble
(
tensor
,
p
);
#endif
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
v
;
}
}
else
{
ShowNTErrors
(
"TODO"
);
}
}
/*
generate data items with a fixed value p (in default type)
>> tensor - the tensor whose data array would be initialized
>> p - number in default type
*/
void
SetDataFixed
(
XTensor
&
tensor
,
DTYPE
p
)
{
_SetDataFixed
(
&
tensor
,
&
p
);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an integer
*/
void
SetDataFixedInt
(
XTensor
&
tensor
,
int
p
)
{
CheckNTErrors
(
tensor
.
dataType
==
X_INT
,
"An integer tensor is required!"
);
_SetDataFixed
(
&
tensor
,
&
p
);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
*/
void
_SetDataFixedInt
(
XTensor
*
tensor
,
int
p
)
{
CheckNTErrors
(
tensor
->
dataType
==
X_INT
,
"the tensor must be in X_INT!"
);
if
(
p
==
0
)
tensor
->
SetZeroAll
();
else
_SetDataFixed
(
tensor
,
&
p
);
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void
_SetDataFixedFloat
(
XTensor
*
tensor
,
float
p
)
{
CheckNTErrors
(
tensor
->
dataType
==
X_FLOAT
,
"the tensor must be in X_FLOAT!"
);
if
(
p
==
0
)
tensor
->
SetZeroAll
();
else
_SetDataFixed
(
tensor
,
&
p
);
ShowNTErrors
(
"TODO"
);
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
*/
void
_SetDataFixedDouble
(
XTensor
*
tensor
,
double
p
)
{
CheckNTErrors
(
tensor
->
dataType
==
X_DOUBLE
,
"the tensor must be in X_DOUBLE!"
);
if
(
p
==
0
)
tensor
->
SetZeroAll
();
else
_SetDataFixed
(
tensor
,
&
p
);
}
template
void
_SetDataFixed
<
int
>
(
XTensor
*
,
int
);
template
void
_SetDataFixed
<
float
>
(
XTensor
*
,
float
);
template
void
_SetDataFixed
<
double
>
(
XTensor
*
,
double
);
/*
set data items along with a given dimension (and keep the remaining items unchanged)
...
...
@@ -396,7 +322,7 @@ generate data items with a uniform distribution in [lower, upper]
>> lower - lower value of the range
>> upper - upper value of the range
*/
void
_SetDataRand
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
)
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
)
{
CheckNTErrors
(
upper
>
lower
,
"the high value must be greater than low value!"
);
...
...
@@ -433,10 +359,6 @@ void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA
_CudaSetDataRand
(
tensor
,
lower
,
upper
);
#endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor);
//delete t2;
}
}
...
...
@@ -449,10 +371,8 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void
_SetDataRandP
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
)
void
_SetDataRandP
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
)
{
CheckNTErrors
(
tensor
->
dataType
==
DEFAULT_DTYPE
,
"TODO"
);
if
(
tensor
->
devID
<
0
)
{
_SetDataRand
(
tensor
,
lower
,
upper
);
...
...
source/tensor/core/getandset/SetData.cu
查看文件 @
aa875fba
...
...
@@ -19,6 +19,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#include <curand.h>
...
...
@@ -27,17 +28,19 @@
#include <curand_kernel.h>
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set an
integer data array with a fixed value p (in int)
/*
set an
data array with a fixed value p (in int, float, float16, double)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedInt(int * d, int size, int p)
template<class T>
__global__
void KernelSetDataFixed(T * d, int size, T p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
...
@@ -45,15 +48,14 @@ void KernelSetDataFixedInt(int * d, int size, int p)
d[i] = p;
}
/*
generate data items with a fixed value p (in int
)
/*
generate data items with a fixed value p (in int
, float, float16, double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedInt(XTensor * tensor, int p)
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3];
int blockSize[3];
...
...
@@ -65,34 +67,59 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
if (tensor->dataType == X_INT){
KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
}
else if (tensor->dataType == X_FLOAT){
KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
}
else if (tensor->dataType == X_DOUBLE){
KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
}
else
ShowNTErrors("TODO");
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a float data array with a fixed value p (in int)
>> d - pointer to the data array
template void _CudaSetDataFixed<int>(XTensor*, int);
template void _CudaSetDataFixed<float>(XTensor*, float);
template void _CudaSetDataFixed<double>(XTensor*, double);
//__device__
//template void _CudaSetDataFixed<half>(XTensor*, half);
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float, float16, double datatype pointer to the data array
>> size - size of the array
>> p - the initial value
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
template<class T>
__global__
void KernelSetDataRand(T * d, int size, T lower, T variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
if (i < size) {
d[i] = d[i] * variance + lower;
}
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
/*
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _CudaSetData
FixedFloat(XTensor * tensor, float p
)
void _CudaSetData
Rand(XTensor * tensor, DTYPE lower, DTYPE upper
)
{
CheckNTErrors(
tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT
!");
CheckNTErrors(
upper > lower, "the high value must be greater than low value
!");
int gridSize[3];
int blockSize[3];
...
...
@@ -105,34 +132,69 @@ void _CudaSetDataFixedFloat(XTensor * tensor, float p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
XTensor tensor1(tensor->order, tensor->dimSize, X_FLOAT, tensor->denseRatio, tensor->devID, tensor->mem);
if (tensor->dataType == X_FLOAT || tensor->dataType == X_DOUBLE){
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen, (float*)tensor->data, tensor->unitNum);
}
else {
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen, (float*)tensor1.data, tensor1.unitNum);
}
DTYPE variance = upper - lower;
if (tensor->dataType == X_FLOAT){
KernelSetDataRand<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, lower, variance);
}
else if (tensor->dataType == X_FLOAT16){
_ConvertDataType(&tensor1, tensor);
half lower1 = __float2half(lower);
half variance1 = __float2half(variance);
KernelSetDataRand<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, lower1, variance1);
}
else {
ShowNTErrors("TODO");
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set
a double data array with a fixed value p (in int)
/*
set
data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataFixedDouble(double * d, int size, double p)
template<class T>
__global__
void KernelSetDataPCut(T * d, int size, T p, T value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
if (i < size) {
if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _CudaSetData
FixedDouble(XTensor * tensor, double p
)
void _CudaSetData
RandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value
)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!"
);
_CudaSetDataRand(tensor, lower, upper
);
int gridSize[3];
int blockSize[3];
...
...
@@ -145,67 +207,19 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float datatype pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataRandFloat(float * d, int size, DTYPE lower, DTYPE variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
d[i] = d[i] * variance + lower;
if (tensor->dataType == X_FLOAT) {
KernelSetDataPCut<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, p, value);
}
}
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - double datatype pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataRandDouble(double * d, int size, DTYPE lower, DTYPE variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
d[i] = d[i] * variance + lower;
else if (tensor->dataType == X_FLOAT16) {
half p1 = __float2half(p);
half value1 = __float2half(value);
KernelSetDataPCut<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1, value1);
}
}
/*
set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
/*
set data items along with a given dimension (and keep the remaining items unchanged) - kernel version
>> tensor - the tensor whose data array would be initialized
>> beg - the beginning position
...
...
@@ -213,8 +227,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block
>> blockNum - number of data blocks
*/
template<class T>
__global__
void KernelSetDataDim(
DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE
p)
void KernelSetDataDim(
T * d, int beg, int len, int blockSize, int blockNum, T
p)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
...
@@ -222,10 +237,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum)
if
(i >= blockSize || j > blockNum)
return;
if(i < beg || i >= beg + len)
if
(i < beg || i >= beg + len)
return;
d[blockSize * j + i] = p;
...
...
@@ -251,15 +266,14 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
int stride = 1;
int blockSize = 1;
int blockNum
= 1;
for
(int i = n - 1; i > dim; i--)
{
int blockNum = 1;
for
(int i = n - 1; i > dim; i--)
{
stride *= tensor->GetDim(i);
}
blockSize = stride * tensor->GetDim(dim);
...
...
@@ -276,8 +290,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
if (tensor->dataType == X_FLOAT){
KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p1);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
...
...
@@ -292,16 +313,17 @@ modify data items along with a given index and dimension
>> blockSize - size of a data block
>> stride - stride of a data block
*/
template<class T>
__global__
void KernelSetDataIndexed(
DTYPE * s, DTYPE
* m, int blockNum, int blockSize, int stride)
void KernelSetDataIndexed(
T * s, T
* m, int blockNum, int blockSize, int stride)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum)
if
(i >= stride || j >= blockNum)
return;
int x = blockSize * j + i;
...
...
@@ -332,7 +354,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
...
...
@@ -358,8 +379,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
if (source->dataType == X_FLOAT){
KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
}
else if (source->dataType == X_FLOAT16){
KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
blockNum, blockSize, stride);
}
BacktoCudaDev(source->devID, devIDBackup);
}
...
...
@@ -452,71 +479,6 @@ void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
}
/*
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen , (float*)tensor->data , tensor->unitNum);
DTYPE variance = upper - lower;
if(variance != 1.0F || lower != 0){
if (tensor->dataType == X_FLOAT)
KernelSetDataRandFloat <<<blocks, threads >>>((float*) tensor->data, tensor->unitNum, lower, variance);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataRandDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, lower, variance);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
_CudaSetDataRand(tensor, lower, upper);
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataPCut << <blocks, threads >> >((float*)tensor->data, tensor->unitNum, p, value);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set the data with an array of offsets (kernel version)
>> data - pointer to the data array
>> offsets - offset for each data item
...
...
source/tensor/core/getandset/SetData.cuh
查看文件 @
aa875fba
...
...
@@ -19,6 +19,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#ifndef __SETDATA_CUH__
...
...
@@ -28,14 +29,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */
void _CudaSetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in int, float, float16, double) */
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
...
...
@@ -47,11 +43,11 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(
const
XTensor * tensor, DTYPE lower, DTYPE upper);
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _CudaSetDataRandP(
const
XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
void _CudaSetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
/* set the data with an array of offsets */
void _CudaSetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYPE num);
...
...
source/tensor/core/getandset/SetData.h
查看文件 @
aa875fba
...
...
@@ -24,29 +24,19 @@
#define __SETDATA_H__
#include "../../XTensor.h"
#include "SetData.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */
void
_SetDataFanInOut
(
XTensor
*
tensor
,
DTYPE
gain
=
1
.
0
F
);
/* generate data items with a fixed value p */
void
_SetDataFixed
(
XTensor
*
tensor
,
void
*
valuePointer
);
/
//
* generate data items with a fixed value p */
//
void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */
void
SetDataFixed
(
XTensor
&
tensor
,
DTYPE
p
);
/* generate data items with a fixed value p (in integer) */
void
SetDataFixedInt
(
XTensor
&
tensor
,
int
p
);
/* generate data items with a fixed value p (in int) */
void
_SetDataFixedInt
(
XTensor
*
tensor
,
int
p
);
/* generate data items with a fixed value p (in float) */
void
_SetDataFixedFloat
(
XTensor
*
tensor
,
float
p
);
/* generate data items with a fixed value p (in double) */
void
_SetDataFixedDouble
(
XTensor
*
tensor
,
double
p
);
template
<
class
T
>
void
_SetDataFixed
(
XTensor
*
tensor
,
T
value
);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void
_SetDataDim
(
XTensor
*
tensor
,
int
beg
,
int
len
,
int
dim
,
DTYPE
p
);
...
...
@@ -58,11 +48,11 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
void
_SetDataLowTri
(
XTensor
*
tensor
,
DTYPE
p
,
int
shift
);
/* generate data items with a uniform distribution in [lower, upper] */
void
_SetDataRand
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void
_SetDataRandP
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
);
void
_SetDataRandP
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
);
/* generate data items with a normal distribution with specified mean and standard deviation */
void
_SetDataRandN
(
XTensor
*
tensor
,
DTYPE
mean
=
0
.
0
F
,
DTYPE
standardDeviation
=
1
.
0
F
);
...
...
source/tensor/function/DropoutWithIndex.cpp
查看文件 @
aa875fba
...
...
@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1D
(
&
c
,
x
.
unitNum
,
x
.
dataType
,
x
.
devID
,
x
.
mem
);
_SetDataFixed
Float
(
&
c
,
1.0
F
);
_SetDataFixed
(
&
c
,
1.0
F
);
_DropoutWithIndex
(
&
x
,
&
maskIndex
,
&
c
);
...
...
source/tensor/function/Loss.cpp
查看文件 @
aa875fba
...
...
@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
{
if
(
t
==
NULL
){
if
(
dedy
->
dataType
==
X_FLOAT
)
_SetDataFixed
Float
(
dedy
,
1.0
F
);
_SetDataFixed
(
dedy
,
1.0
F
);
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
_SetDataFixed
Double
(
dedy
,
1.0
);
_SetDataFixed
(
dedy
,
1.0
);
else
if
(
dedy
->
dataType
==
X_INT
)
_SetDataFixed
Int
(
dedy
,
1
);
_SetDataFixed
(
dedy
,
1
);
else
{
ShowNTErrors
(
"TODO"
);
}
...
...
source/tensor/test/TDropout.cpp
查看文件 @
aa875fba
...
...
@@ -50,7 +50,7 @@ bool TestDropout1()
XTensor
yUser
;
/* initialize variables */
_SetDataFixed
Float
(
x
,
1.0
F
);
_SetDataFixed
(
x
,
1.0
F
);
y
->
SetZeroAll
();
/* call Dropout function */
...
...
@@ -88,7 +88,7 @@ bool TestDropout1()
XTensor
yUserGPU
;
/* initialize variables */
_SetDataFixed
Float
(
xGPU
,
1.0
F
);
_SetDataFixed
(
xGPU
,
1.0
F
);
yGPU
->
SetZeroAll
();
/* call Dropout function */
...
...
@@ -157,10 +157,10 @@ bool TestDropout2()
XTensor
*
dedy
=
NewTensor
(
order
,
dimSize
);
/* initialize variables */
_SetDataFixed
Float
(
x
,
1.0
F
);
_SetDataFixed
(
x
,
1.0
F
);
y
->
SetZeroAll
();
dedx
->
SetZeroAll
();
_SetDataFixed
Float
(
dedy
,
1.5
F
);
_SetDataFixed
(
dedy
,
1.5
F
);
/* call Dropout function */
float
dropProb
=
0.5
F
;
...
...
@@ -183,10 +183,10 @@ bool TestDropout2()
XTensor
*
dedyGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* initialize variables */
_SetDataFixed
Float
(
xGPU
,
1.0
F
);
_SetDataFixed
(
xGPU
,
1.0
F
);
yGPU
->
SetZeroAll
();
dedxGPU
->
SetZeroAll
();
_SetDataFixed
Float
(
dedyGPU
,
1.5
F
);
_SetDataFixed
(
dedyGPU
,
1.5
F
);
/* call Dropout function */
_Dropout
(
xGPU
,
yGPU
,
seed
,
dropProb
);
...
...
source/tensor/test/TReduceSum.cpp
查看文件 @
aa875fba
...
...
@@ -196,8 +196,8 @@ bool TestReduceSum2()
XTensor
tUser
;
/* initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
(
float
)
s
->
GetDim
(
1
));
_SetDataFixed
(
s
,
1.0
F
);
_SetDataFixed
(
answer
,
(
float
)
s
->
GetDim
(
1
));
/* call ReduceSum function */
_ReduceSum
(
s
,
t
,
1
);
...
...
@@ -216,7 +216,7 @@ bool TestReduceSum2()
XTensor
tUserGPU
;
/* initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
F
);
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU
,
1
);
...
...
@@ -285,8 +285,8 @@ bool TestReduceSum3()
XTensor
tUser
;
/* initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
(
float
)
s
->
GetDim
(
1
));
_SetDataFixed
(
s
,
1.0
F
);
_SetDataFixed
(
answer
,
(
float
)
s
->
GetDim
(
1
));
/* call ReduceSum function */
_ReduceSum
(
s
,
t
,
1
);
...
...
@@ -305,7 +305,7 @@ bool TestReduceSum3()
XTensor
tUserGPU
;
/* initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
F
);
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU
,
1
);
...
...
@@ -374,8 +374,8 @@ bool TestReduceSum4()
XTensor
tUser
;
/* initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
(
float
)
s
->
GetDim
(
1
));
_SetDataFixed
(
s
,
1.0
F
);
_SetDataFixed
(
answer
,
(
float
)
s
->
GetDim
(
1
));
/* call ReduceSum function */
_ReduceSum
(
s
,
t
,
1
);
...
...
@@ -394,7 +394,7 @@ bool TestReduceSum4()
XTensor
tUserGPU
;
/* initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
F
);
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU
,
1
);
...
...
@@ -465,8 +465,8 @@ bool TestReduceSum5()
XTensor
tUser
;
/* initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
(
float
)
s
->
GetDim
(
1
));
_SetDataFixed
(
s
,
1.0
F
);
_SetDataFixed
(
answer
,
(
float
)
s
->
GetDim
(
1
));
/* call ReduceSum function */
_ReduceSum
(
s
,
t
,
1
);
...
...
@@ -485,7 +485,7 @@ bool TestReduceSum5()
XTensor
tUserGPU
;
/* initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
F
);
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU
,
1
);
...
...
@@ -556,8 +556,8 @@ bool TestReduceSum6()
XTensor
tUser
;
/* initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
(
float
)
s
->
GetDim
(
1
));
_SetDataFixed
(
s
,
1.0
F
);
_SetDataFixed
(
answer
,
(
float
)
s
->
GetDim
(
1
));
/* call ReduceSum function */
_ReduceSum
(
s
,
t
,
1
);
...
...
@@ -576,7 +576,7 @@ bool TestReduceSum6()
XTensor
tUserGPU
;
/* initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
F
);
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU
,
1
);
...
...
source/tensor/test/TSetData.cpp
查看文件 @
aa875fba
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
...
...
@@ -17,10 +17,12 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#include "TSetData.h"
#include "../core/getandset/SetData.h"
#include "../core/getandset/ConvertDataType.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -118,7 +120,7 @@ bool TestSetData2()
XTensor
*
modify
=
NewTensor
(
dataOrder
,
dataDimSize
);
/* Initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
(
s
,
1.0
F
);
modify
->
SetData
(
data
,
dataUnitNum
);
/* call SetDataIndexed function */
...
...
@@ -136,7 +138,7 @@ bool TestSetData2()
XTensor
*
modifyGPU
=
NewTensor
(
dataOrder
,
dataDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
);
modifyGPU
->
SetData
(
data
,
dataUnitNum
);
/* call SetDataIndexed function */
...
...
@@ -211,11 +213,11 @@ bool TestSetData3()
XTensor
*
modify
=
NewTensor
(
dataOrder
,
dataDimSize
);
/* Initialize variables */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
(
s
,
1.0
);
modify
->
SetData
(
data
,
dataUnitNum
);
/* call SetDataIndexed function */
_SetDataFixed
Float
(
s
,
1.0
F
);
_SetDataFixed
(
s
,
1.0
);
_SetDataIndexed
(
s
,
modify
,
1
,
1
);
/* check results */
...
...
@@ -230,7 +232,7 @@ bool TestSetData3()
XTensor
*
modifyGPU
=
NewTensor
(
dataOrder
,
dataDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
_SetDataFixed
Float
(
sGPU
,
1.0
F
);
_SetDataFixed
(
sGPU
,
1.0
);
modifyGPU
->
SetData
(
data
,
dataUnitNum
);
/* call SetDataIndexed function */
...
...
@@ -406,6 +408,427 @@ bool TestSetData5()
#endif // USE_CUDA
}
/*
case 6: float16 test SetDataRand function.
set the tensor items by a uniform distribution in range [lower, upper].
*/
bool
TestSetData6
()
{
/* a input tensor of size (2, 4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
answer
[
2
][
4
]
=
{
0
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
sHalfGPU
;
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
/* call setdatarand function */
_SetDataRand
(
&
sHalfGPU
,
0.0
,
1.0
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
sHalfGPU
,
sGPU
);
/* check results */
gpuTest
=
sGPU
->
CheckData
(
answer
,
sUnitNum
,
1.0
F
);
/* destroy variables */
delete
sGPU
;
delete
[]
sDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 7: float16 test SetDataRandP function.
first set the tensor items by a uniform distribution in range [lower, upper].
then set the item to a pre-defined value if the item >= p, set the item to 0 otherwise
*/
bool
TestSetData7
()
{
/* a input tensor of size (2, 4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
answer
[
2
][
4
]
=
{
0
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
sHalfGPU
;
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
/* call setdatarandp function */
_SetDataRandP
(
&
sHalfGPU
,
0.0
,
1.0
,
0.5
,
1.0
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
sHalfGPU
,
sGPU
);
/* check results */
gpuTest
=
sGPU
->
CheckData
(
answer
,
sUnitNum
,
1.1
F
);
/* destroy variables */
delete
sGPU
;
delete
[]
sDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 8: float16 test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool
TestSetData8
()
{
/* a input tensor of size (2, 4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
/* a data tensor of size (4) for GPU test */
int
dataOrder
=
1
;
int
*
dataDimSize
=
new
int
[
dataOrder
];
dataDimSize
[
0
]
=
4
;
int
dataUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
dataOrder
;
i
++
)
dataUnitNum
*=
dataDimSize
[
i
];
DTYPE
data
[
4
]
=
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
,
1.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
modifyGPU
=
NewTensor
(
dataOrder
,
dataDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
sHalfGPU
;
XTensor
modifyHalfGPU
;
/* Initialize modifyGPU */
modifyGPU
->
SetData
(
data
,
dataUnitNum
);
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
modifyHalfGPU
=
ConvertDataType
(
*
modifyGPU
,
X_FLOAT16
);
/* Initialize sHalfGPU */
_SetDataFixed
(
&
sHalfGPU
,
1.0
);
/* call setdataindexed function */
_SetDataIndexed
(
&
sHalfGPU
,
&
modifyHalfGPU
,
0
,
1
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
sHalfGPU
,
sGPU
);
/* check results */
gpuTest
=
sGPU
->
CheckData
(
answer
,
sUnitNum
,
1e-5
F
);
/* destroy variables */
delete
sGPU
;
delete
modifyGPU
;
delete
[]
sDimSize
;
delete
[]
dataDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
delete
[]
dataDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 9: float16 test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool
TestSetData9
()
{
/* a input tensor of size (2, 4, 3) */
int
sOrder
=
3
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
2
;
sDimSize
[
1
]
=
4
;
sDimSize
[
2
]
=
3
;
int
sUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
/* a data tensor of size (2, 3) for GPU test */
int
dataOrder
=
2
;
int
*
dataDimSize
=
new
int
[
dataOrder
];
dataDimSize
[
0
]
=
2
;
dataDimSize
[
1
]
=
3
;
int
dataUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
dataOrder
;
i
++
)
dataUnitNum
*=
dataDimSize
[
i
];
DTYPE
data
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
4
][
3
]
=
{
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
},
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
modifyGPU
=
NewTensor
(
dataOrder
,
dataDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
sHalfGPU
;
XTensor
modifyHalfGPU
;
/* Initialize modifyGPU */
modifyGPU
->
SetData
(
data
,
dataUnitNum
);
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
modifyHalfGPU
=
ConvertDataType
(
*
modifyGPU
,
X_FLOAT16
);
/* Initialize sHalfGPU */
_SetDataFixed
(
&
sHalfGPU
,
1.0
);
/* call setdataindexed function */
_SetDataIndexed
(
&
sHalfGPU
,
&
modifyHalfGPU
,
1
,
1
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
sHalfGPU
,
sGPU
);
/* check results */
gpuTest
=
sGPU
->
CheckData
(
answer
,
sUnitNum
,
1e-5
F
);
/* destroy variables */
delete
sGPU
;
delete
modifyGPU
;
delete
[]
sDimSize
;
delete
[]
dataDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
sDimSize
;
delete
[]
dataDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 10: float16 test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool
TestSetData10
()
{
/* a input tensor of size (3, 3) */
int
order
=
2
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
3
;
dimSize
[
1
]
=
3
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
sData
[
3
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
},
{
7.0
F
,
8.0
F
,
9.0
F
}
};
DTYPE
answer
[
3
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
0.0
F
,
0.0
F
,
0.0
F
},
{
7.0
F
,
8.0
F
,
9.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
sHalfGPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
unitNum
);
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
/* call _setdatadim function */
_SetDataDim
(
&
sHalfGPU
,
1
,
1
,
0
,
0
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
sHalfGPU
,
sGPU
);
/* check results */
gpuTest
=
sGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
sGPU
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
dimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 11: float16 test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool
TestSetData11
()
{
/* a input tensor of size (2, 4, 3) */
int
order
=
3
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
2
;
dimSize
[
1
]
=
4
;
dimSize
[
2
]
=
3
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
data
[
2
][
4
][
3
]
=
{
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
},
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
}
};
DTYPE
answer
[
2
][
4
][
3
]
=
{
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
},
{
5.0
F
,
5.0
F
,
5.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
},
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
},
{
5.0
F
,
5.0
F
,
5.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* create float16 tensors */
XTensor
sHalfGPU
;
/* initialize variables */
sGPU
->
SetData
(
data
,
unitNum
);
/* convert data type from float to float16 */
sHalfGPU
=
ConvertDataType
(
*
sGPU
,
X_FLOAT16
);
/* call _setdatadim function */
_SetDataDim
(
&
sHalfGPU
,
2
,
1
,
1
,
5.0
F
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
sHalfGPU
,
sGPU
);
/* check results */
gpuTest
=
sGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
sGPU
;
delete
[]
dimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
dimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
...
...
@@ -462,6 +885,60 @@ bool TestSetData()
else
XPRINT
(
0
,
stdout
,
">> case 5 passed!
\n
"
);
/* case 6 test */
caseFlag
=
TestSetData6
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 6 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 6 passed!
\n
"
);
/* case 7 test */
caseFlag
=
TestSetData7
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 7 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 7 passed!
\n
"
);
/* case 8 test */
caseFlag
=
TestSetData8
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 8 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 8 passed!
\n
"
);
/* case 9 test */
caseFlag
=
TestSetData9
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 9 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 9 passed!
\n
"
);
/* case 10 test */
caseFlag
=
TestSetData10
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 10 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 10 passed!
\n
"
);
/* case 11 test */
caseFlag
=
TestSetData11
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 11 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 11 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
source/tensor/test/TSpread.cpp
查看文件 @
aa875fba
...
...
@@ -90,7 +90,7 @@ bool TestSpread1()
XTensor
*
modify
=
NewTensor
(
dataOrder
,
dataDimSize
);
/* Initialize variables */
_SetDataFixed
Float
(
s
,
0.0
F
);
_SetDataFixed
(
s
,
0.0
F
);
modify
->
SetData
(
data
,
dataUnitNum
);
/* call _Spread function */
...
...
@@ -108,7 +108,7 @@ bool TestSpread1()
XTensor
*
modifyGPU
=
NewTensor
(
dataOrder
,
dataDimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* Initialize variables */
_SetDataFixed
Float
(
sGPU
,
0.0
F
);
_SetDataFixed
(
sGPU
,
0.0
F
);
modifyGPU
->
SetData
(
data
,
dataUnitNum
);
/* call _Spread function */
...
...
source/tensor/test/TSumDim.cpp
查看文件 @
aa875fba
...
...
@@ -295,8 +295,8 @@ bool TestSumDim3()
/* initialize variables */
a
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixed
Float
(
b
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
1.0
F
);
_SetDataFixed
(
b
,
1.0
F
);
_SetDataFixed
(
answer
,
1.0
F
);
/* call SumDim function */
_SumDim
(
a
,
b
,
c
,
1
);
...
...
@@ -322,7 +322,7 @@ bool TestSumDim3()
/* Initialize variables */
aGPU
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixed
Float
(
bGPU
,
1.0
F
);
_SetDataFixed
(
bGPU
,
1.0
F
);
/* call sum function */
_SumDim
(
aGPU
,
bGPU
,
cGPU
,
1
);
...
...
@@ -404,8 +404,8 @@ bool TestSumDim4()
/* initialize variables */
a
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixed
Float
(
b
,
1.0
F
);
_SetDataFixed
Float
(
answer
,
1.0
F
);
_SetDataFixed
(
b
,
1.0
F
);
_SetDataFixed
(
answer
,
1.0
F
);
/* call SumDim function */
_SumDim
(
a
,
b
,
c
,
1
);
...
...
@@ -431,7 +431,7 @@ bool TestSumDim4()
/* Initialize variables */
aGPU
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixed
Float
(
bGPU
,
1.0
F
);
_SetDataFixed
(
bGPU
,
1.0
F
);
/* call sum function */
_SumDim
(
aGPU
,
bGPU
,
cGPU
,
1
);
...
...
source/tensor/test/Test.cpp
查看文件 @
aa875fba
...
...
@@ -30,7 +30,7 @@ bool Test()
XPRINT
(
0
,
stdout
,
"Testing the XTensor utilites ...
\n\n
"
);
//wrong = !TestAbsolute() || wrong;
wrong
=
!
TestClip
()
||
wrong
;
//
wrong = !TestClip() || wrong;
//wrong = !TestCompare() || wrong;
//wrong = !TestConcatenate() || wrong;
//wrong = !TestConcatenateSolely() || wrong;
...
...
@@ -38,8 +38,8 @@ bool Test()
//wrong = !TestConvertDataType() || wrong;
//wrong = !TestCopyIndexed() || wrong;
//wrong = !TestCopyValues() || wrong;
wrong
=
!
TestDiv
()
||
wrong
;
wrong
=
!
TestDivDim
()
||
wrong
;
//
wrong = !TestDiv() || wrong;
//
wrong = !TestDivDim() || wrong;
//wrong = !TestExp() || wrong;
//wrong = !TestGather() || wrong;
//wrong = !TestLog() || wrong;
...
...
@@ -49,7 +49,7 @@ bool Test()
//wrong = !TestMatrixMulBatched() || wrong;
//wrong = !TestMerge() || wrong;
//wrong = !TestMultiply() || wrong;
wrong
=
!
TestMultiplyDim
()
||
wrong
;
//
wrong = !TestMultiplyDim() || wrong;
//wrong = !TestNegate() || wrong;
//wrong = !TestNormalize() || wrong;
//wrong = !TestPower() || wrong;
...
...
@@ -60,17 +60,17 @@ bool Test()
//wrong = !TestReduceSumSquared() || wrong;
//wrong = !TestReduceVariance() || wrong;
//wrong = !TestRound() || wrong;
wrong
=
!
TestScaleAndShift
()
||
wrong
;
//
wrong = !TestScaleAndShift() || wrong;
//wrong = !TestSelect() || wrong;
//wrong = !TestSetAscendingOrder() || wrong;
//
wrong = !TestSetData() || wrong;
wrong
=
!
TestSetData
()
||
wrong
;
//wrong = !TestSign() || wrong;
//wrong = !TestSin() || wrong;
//wrong = !TestSort() || wrong;
//wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong;
wrong
=
!
TestSum
()
||
wrong
;
//
wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestSumDim() || wrong;
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论