Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
3800528b
Commit
3800528b
authored
Jul 29, 2019
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update setData flaot16 and modify some files which use the setData (main _SetDataFixed)
parent
ccfe71d0
显示空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
135 行增加
和
219 行删除
+135
-219
source/network/XBackwardLoss.cpp
+6
-6
source/sample/transformer/T2TPredictor.cpp
+1
-1
source/tensor/core/getandset/SetData.cpp
+50
-102
source/tensor/core/getandset/SetData.cu
+65
-87
source/tensor/core/getandset/SetData.cuh
+4
-7
source/tensor/core/getandset/SetData.h
+5
-12
source/tensor/function/DropoutWithIndex.cpp
+1
-1
source/tensor/function/Loss.cpp
+3
-3
没有找到文件。
source/network/XBackwardLoss.cpp
查看文件 @
3800528b
...
@@ -53,11 +53,11 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
...
@@ -53,11 +53,11 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
if
(
income
.
tailNum
==
1
)
{
if
(
income
.
tailNum
==
1
)
{
if
(
dedy
->
dataType
==
X_FLOAT
)
if
(
dedy
->
dataType
==
X_FLOAT
)
_SetDataFixed
Float
(
dedy
,
1.0
F
);
_SetDataFixed
(
dedy
,
1.0
F
);
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
_SetDataFixed
Double
(
dedy
,
1.0
);
_SetDataFixed
(
dedy
,
1.0
);
else
if
(
dedy
->
dataType
==
X_INT
)
else
if
(
dedy
->
dataType
==
X_INT
)
_SetDataFixed
Int
(
dedy
,
1
);
_SetDataFixed
(
dedy
,
1
);
else
else
ShowNTErrors
(
"TODO"
);
ShowNTErrors
(
"TODO"
);
...
@@ -145,11 +145,11 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
...
@@ -145,11 +145,11 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
{
{
if
(
gold
==
NULL
){
if
(
gold
==
NULL
){
if
(
dedy
->
dataType
==
X_FLOAT
)
if
(
dedy
->
dataType
==
X_FLOAT
)
_SetDataFixed
Float
(
dedy
,
1.0
F
);
_SetDataFixed
(
dedy
,
1.0
F
);
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
_SetDataFixed
Double
(
dedy
,
1.0
);
_SetDataFixed
(
dedy
,
1.0
);
else
if
(
dedy
->
dataType
==
X_INT
)
else
if
(
dedy
->
dataType
==
X_INT
)
_SetDataFixed
Int
(
dedy
,
1
);
_SetDataFixed
(
dedy
,
1
);
else
{
else
{
ShowNTErrors
(
"TODO"
);
ShowNTErrors
(
"TODO"
);
}
}
...
...
source/sample/transformer/T2TPredictor.cpp
查看文件 @
3800528b
...
@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
...
@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims
[
inputEnc
->
order
-
1
]
=
1
;
dims
[
inputEnc
->
order
-
1
]
=
1
;
InitTensor
(
&
first
,
inputEnc
->
order
,
dims
,
X_INT
,
1.0
F
,
inputEnc
->
devID
,
inputEnc
->
mem
);
InitTensor
(
&
first
,
inputEnc
->
order
,
dims
,
X_INT
,
1.0
F
,
inputEnc
->
devID
,
inputEnc
->
mem
);
_SetDataFixed
Int
(
&
first
,
startSymbol
);
_SetDataFixed
(
&
first
,
startSymbol
);
/* add a new word into the input sequence of the decoder side */
/* add a new word into the input sequence of the decoder side */
if
(
inputLast
==
NULL
)
{
if
(
inputLast
==
NULL
)
{
...
...
source/tensor/core/getandset/SetData.cpp
查看文件 @
3800528b
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "SetData.cuh"
#include "SetData.cuh"
#include "../../XUtility.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "../movement/CopyValues.h"
#include "ConvertDataType.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
#include "sys/time.h"
...
@@ -81,82 +82,69 @@ generate data items with a fixed value p
...
@@ -81,82 +82,69 @@ generate data items with a fixed value p
>> tensor - the tensor whose data array would be initialized
>> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor
>> p - pointer to the number for initializing the tensor
*/
*/
void
_SetDataFixed
(
XTensor
*
tensor
,
void
*
valuePointer
)
template
<
class
T
>
void
_SetDataFixed
(
XTensor
*
tensor
,
T
value
)
{
{
#ifdef USE_CUDA
if
(
tensor
->
devID
>=
0
)
{
_CudaSetDataFixed
(
tensor
,
value
);
return
;
}
#endif
int
num
=
tensor
->
unitNum
;
int
num
=
tensor
->
unitNum
;
if
(
tensor
->
dataType
==
X_INT
){
if
(
tensor
->
dataType
==
X_INT
)
{
int
p
=
*
(
int
*
)
valuePointer
;
if
(
tensor
->
devID
<
0
){
int
*
d
=
(
int
*
)
tensor
->
data
;
int
*
d
=
(
int
*
)
tensor
->
data
;
if
(
num
%
4
==
0
){
int
v
=
(
int
)
value
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
){
if
(
num
%
4
==
0
)
{
d
[
i
]
=
p
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
d
[
i
+
1
]
=
p
;
d
[
i
]
=
v
;
d
[
i
+
2
]
=
p
;
d
[
i
+
1
]
=
v
;
d
[
i
+
3
]
=
p
;
d
[
i
+
2
]
=
v
;
d
[
i
+
3
]
=
v
;
}
}
}
}
else
{
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
p
;
d
[
i
]
=
v
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataFixedInt
(
tensor
,
p
);
#endif
}
}
}
}
else
if
(
tensor
->
dataType
==
X_FLOAT
){
else
if
(
tensor
->
dataType
==
X_FLOAT
)
{
float
p
=
*
(
float
*
)
valuePointer
;
if
(
tensor
->
devID
<
0
){
float
*
d
=
(
float
*
)
tensor
->
data
;
float
*
d
=
(
float
*
)
tensor
->
data
;
if
(
num
%
4
==
0
){
float
v
=
(
float
)
value
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
){
if
(
num
%
4
==
0
)
{
d
[
i
]
=
p
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
d
[
i
+
1
]
=
p
;
d
[
i
]
=
v
;
d
[
i
+
2
]
=
p
;
d
[
i
+
1
]
=
v
;
d
[
i
+
3
]
=
p
;
d
[
i
+
2
]
=
v
;
d
[
i
+
3
]
=
v
;
}
}
}
}
else
{
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
p
;
d
[
i
]
=
v
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataFixedFloat
(
tensor
,
p
);
#endif
}
}
}
}
else
if
(
tensor
->
dataType
==
X_DOUBLE
){
else
if
(
tensor
->
dataType
==
X_DOUBLE
)
{
double
p
=
*
(
double
*
)
valuePointer
;
if
(
tensor
->
devID
<
0
){
double
*
d
=
(
double
*
)
tensor
->
data
;
double
*
d
=
(
double
*
)
tensor
->
data
;
if
(
num
%
4
==
0
){
double
v
=
(
double
)
value
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
){
d
[
i
]
=
p
;
if
(
num
%
4
==
0
)
{
d
[
i
+
1
]
=
p
;
for
(
int
i
=
0
;
i
<
num
;
i
+=
4
)
{
d
[
i
+
2
]
=
p
;
d
[
i
]
=
v
;
d
[
i
+
3
]
=
p
;
d
[
i
+
1
]
=
v
;
}
d
[
i
+
2
]
=
v
;
}
d
[
i
+
3
]
=
v
;
else
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
d
[
i
]
=
p
;
}
}
}
}
else
{
else
{
#ifdef USE_CUDA
for
(
int
i
=
0
;
i
<
num
;
i
++
)
_CudaSetDataFixedDouble
(
tensor
,
p
);
d
[
i
]
=
v
;
#endif
}
}
}
}
else
{
else
ShowNTErrors
(
"TODO"
);
ShowNTErrors
(
"TODO"
);
}
}
}
/*
/*
...
@@ -166,7 +154,7 @@ generate data items with a fixed value p (in default type)
...
@@ -166,7 +154,7 @@ generate data items with a fixed value p (in default type)
*/
*/
void
SetDataFixed
(
XTensor
&
tensor
,
DTYPE
p
)
void
SetDataFixed
(
XTensor
&
tensor
,
DTYPE
p
)
{
{
_SetDataFixed
(
&
tensor
,
&
p
);
_SetDataFixed
(
&
tensor
,
p
);
}
}
/*
/*
...
@@ -177,53 +165,17 @@ generate data items with a fixed value p (in integer)
...
@@ -177,53 +165,17 @@ generate data items with a fixed value p (in integer)
void
SetDataFixedInt
(
XTensor
&
tensor
,
int
p
)
void
SetDataFixedInt
(
XTensor
&
tensor
,
int
p
)
{
{
CheckNTErrors
(
tensor
.
dataType
==
X_INT
,
"An integer tensor is required!"
);
CheckNTErrors
(
tensor
.
dataType
==
X_INT
,
"An integer tensor is required!"
);
_SetDataFixed
(
&
tensor
,
&
p
);
_SetDataFixed
(
&
tensor
,
p
);
}
}
/*
/*
generate data items with a fixed value p (in integer)
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
>> p - an int-valued number
*/
*/
void
_SetDataFixedInt
(
XTensor
*
tensor
,
int
p
)
template
void
_SetDataFixed
<
int
>
(
XTensor
*
,
int
);
{
template
void
_SetDataFixed
<
float
>
(
XTensor
*
,
float
);
CheckNTErrors
(
tensor
->
dataType
==
X_INT
,
"the tensor must be in X_INT!"
);
template
void
_SetDataFixed
<
double
>
(
XTensor
*
,
double
);
if
(
p
==
0
)
tensor
->
SetZeroAll
();
else
_SetDataFixed
(
tensor
,
&
p
);
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void
_SetDataFixedFloat
(
XTensor
*
tensor
,
float
p
)
{
CheckNTErrors
(
tensor
->
dataType
==
X_FLOAT
,
"the tensor must be in X_FLOAT!"
);
if
(
p
==
0
)
tensor
->
SetZeroAll
();
else
_SetDataFixed
(
tensor
,
&
p
);
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
*/
void
_SetDataFixedDouble
(
XTensor
*
tensor
,
double
p
)
{
CheckNTErrors
(
tensor
->
dataType
==
X_DOUBLE
,
"the tensor must be in X_DOUBLE!"
);
if
(
p
==
0
)
tensor
->
SetZeroAll
();
else
_SetDataFixed
(
tensor
,
&
p
);
}
/*
/*
generate data items with a fixed value p only if
generate data items with a fixed value p only if
...
@@ -319,7 +271,6 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
...
@@ -319,7 +271,6 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{
{
int
n
=
tensor
->
order
;
int
n
=
tensor
->
order
;
CheckNTErrors
(
tensor
->
dataType
==
DEFAULT_DTYPE
,
"TODO!"
);
CheckNTErrors
(
dim
<
n
&&
dim
>=
0
,
"Illegal dimension!"
);
CheckNTErrors
(
dim
<
n
&&
dim
>=
0
,
"Illegal dimension!"
);
CheckNTErrors
(
beg
>=
0
&&
beg
<
tensor
->
GetDim
(
dim
),
"Illegal beginning position!"
);
CheckNTErrors
(
beg
>=
0
&&
beg
<
tensor
->
GetDim
(
dim
),
"Illegal beginning position!"
);
CheckNTErrors
(
beg
+
len
>=
0
&&
beg
+
len
<
tensor
->
GetDim
(
dim
),
"Illegal length!"
);
CheckNTErrors
(
beg
+
len
>=
0
&&
beg
+
len
<
tensor
->
GetDim
(
dim
),
"Illegal length!"
);
...
@@ -372,7 +323,6 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
...
@@ -372,7 +323,6 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int
order
=
source
->
order
;
int
order
=
source
->
order
;
int
size
=
source
->
GetDim
(
dim
);
int
size
=
source
->
GetDim
(
dim
);
CheckNTErrors
(
source
->
dataType
==
DEFAULT_DTYPE
,
"TODO!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
order
,
"Illegal dimension!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
order
,
"Illegal dimension!"
);
CheckNTErrors
(
index
>=
0
&&
index
<
size
,
"Illegal index!"
);
CheckNTErrors
(
index
>=
0
&&
index
<
size
,
"Illegal index!"
);
...
@@ -527,8 +477,6 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
...
@@ -527,8 +477,6 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
*/
*/
void
_SetDataRandP
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
)
void
_SetDataRandP
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
)
{
{
CheckNTErrors
(
tensor
->
dataType
==
DEFAULT_DTYPE
,
"TODO"
);
if
(
tensor
->
devID
<
0
)
{
if
(
tensor
->
devID
<
0
)
{
_SetDataRand
(
tensor
,
lower
,
upper
);
_SetDataRand
(
tensor
,
lower
,
upper
);
...
...
source/tensor/core/getandset/SetData.cu
查看文件 @
3800528b
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
/*
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
*/
#include <curand.h>
#include <curand.h>
...
@@ -27,17 +28,20 @@
...
@@ -27,17 +28,20 @@
#include <curand_kernel.h>
#include <curand_kernel.h>
#include "../../XDevice.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XUtility.h"
#include "ConvertDataType.h"
#include <device_launch_parameters.h>
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
set an
integer data array with a fixed value p (in int)
set an
data array with a fixed value p (in int, float, float16, double)
>> d - pointer to the data array
>> d - pointer to the data array
>> size - size of the array
>> size - size of the array
>> p - the initial value
>> p - the initial value
*/
*/
template<class T>
__global__
__global__
void KernelSetDataFixed
Int(int * d, int size, int
p)
void KernelSetDataFixed
(T * d, int size, T
p)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
@@ -46,14 +50,13 @@ void KernelSetDataFixedInt(int * d, int size, int p)
...
@@ -46,14 +50,13 @@ void KernelSetDataFixedInt(int * d, int size, int p)
}
}
/*
/*
generate data items with a fixed value p (in int
)
generate data items with a fixed value p (in int
, float, float16, double)
>> tensor - the tensor for initialization
>> tensor - the tensor for initialization
>> p - the initial value
>> p - the initial value
*/
*/
void _CudaSetDataFixedInt(XTensor * tensor, int p)
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p)
{
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3];
int gridSize[3];
int blockSize[3];
int blockSize[3];
...
@@ -65,89 +68,48 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
...
@@ -65,89 +68,48 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
int devIDBackup;
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
if (tensor->dataType == X_INT){
KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
}
else if (tensor->dataType == X_FLOAT){
KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
}
else if (tensor->dataType == X_DOUBLE){
KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
}
else
ShowNTErrors("TODO");
BacktoCudaDev(tensor->devID, devIDBackup);
BacktoCudaDev(tensor->devID, devIDBackup);
}
}
/*
template void _CudaSetDataFixed<int>(XTensor*, int);
set a float data array with a fixed value p (in int)
template void _CudaSetDataFixed<float>(XTensor*, float);
>> d - pointer to the data array
template void _CudaSetDataFixed<double>(XTensor*, double);
>> size - size of the array
//__device__
>> p - the initial value
//template void _CudaSetDataFixed<half>(XTensor*, half);
*/
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
/*
set a double data array with a fixed value p (in int)
set data array with a uniform distribution in [low, high]
>> d - pointer to the data array
>> deviceStates - the state of curand
>> d - float, float16, double datatype pointer to the data array
>> size - size of the array
>> size - size of the array
>> p - the initial value
>> lower - low value of the range
>> variance - the variance of the range
*/
*/
template<class T>
__global__
__global__
void KernelSetData
FixedDouble(double * d, int size, double p
)
void KernelSetData
Rand(T * d, int size, T lower, T variance
)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size) {
d[i] = p;
d[i] = d[i] * variance + lower;
}
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
}
/*
/*
...
@@ -214,6 +176,7 @@ void KernelSetDataFixedCondInt(int * d, int * c, int size, int p)
...
@@ -214,6 +176,7 @@ void KernelSetDataFixedCondInt(int * d, int * c, int size, int p)
d[i] = p;
d[i] = p;
}
}
/*
/*
generate data items with a fixed value p (in int) only
generate data items with a fixed value p (in int) only
if the condition entry is non-zero
if the condition entry is non-zero
...
@@ -286,8 +249,9 @@ set data items to a pre-defined value if its value >= p, set it to 0 otherwise
...
@@ -286,8 +249,9 @@ set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> lower - low value of the range
>> lower - low value of the range
>> variance - the variance of the range
>> variance - the variance of the range
*/
*/
template<class T>
__global__
__global__
void KernelSetDataPCut(
DTYPE * d, int size, DTYPE p, DTYPE
value)
void KernelSetDataPCut(
T * d, int size, T p, T
value)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
@@ -307,8 +271,9 @@ set data items along with a given dimension (and keep the remaining items unchan
...
@@ -307,8 +271,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block
>> blockSize - size of a data block
>> blockNum - number of data blocks
>> blockNum - number of data blocks
*/
*/
template<class T>
__global__
__global__
void KernelSetDataDim(
DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE
p)
void KernelSetDataDim(
T * d, int beg, int len, int blockSize, int blockNum, T
p)
{
{
/* offset in each block */
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
@@ -316,10 +281,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
...
@@ -316,10 +281,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
/* block id */
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum)
if
(i >= blockSize || j > blockNum)
return;
return;
if(i < beg || i >= beg + len)
if
(i < beg || i >= beg + len)
return;
return;
d[blockSize * j + i] = p;
d[blockSize * j + i] = p;
...
@@ -353,7 +318,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
...
@@ -353,7 +318,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int stride = 1;
int stride = 1;
int blockSize = 1;
int blockSize = 1;
int blockNum = 1;
int blockNum = 1;
for
(int i = n - 1; i > dim; i--)
{
for
(int i = n - 1; i > dim; i--)
{
stride *= tensor->GetDim(i);
stride *= tensor->GetDim(i);
}
}
blockSize = stride * tensor->GetDim(dim);
blockSize = stride * tensor->GetDim(dim);
...
@@ -370,8 +335,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
...
@@ -370,8 +335,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup;
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride,
if (tensor->dataType == X_FLOAT){
KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
len * stride, blockSize, blockNum, p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p1);
}
BacktoCudaDev(tensor->devID, devIDBackup);
BacktoCudaDev(tensor->devID, devIDBackup);
}
}
...
@@ -386,8 +358,9 @@ modify data items along with a given index and dimension
...
@@ -386,8 +358,9 @@ modify data items along with a given index and dimension
>> blockSize - size of a data block
>> blockSize - size of a data block
>> stride - stride of a data block
>> stride - stride of a data block
*/
*/
template<class T>
__global__
__global__
void KernelSetDataIndexed(
DTYPE * s, DTYPE
* m, int blockNum, int blockSize, int stride)
void KernelSetDataIndexed(
T * s, T
* m, int blockNum, int blockSize, int stride)
{
{
/* offset in each block */
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
@@ -395,7 +368,7 @@ void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int
...
@@ -395,7 +368,7 @@ void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int
/* block id */
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum)
if
(i >= stride || j >= blockNum)
return;
return;
int x = blockSize * j + i;
int x = blockSize * j + i;
...
@@ -426,7 +399,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
...
@@ -426,7 +399,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order;
int order = source->order;
int size = source->GetDim(dim);
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
...
@@ -452,8 +424,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
...
@@ -452,8 +424,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int devIDBackup;
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
if (source->dataType == X_FLOAT){
KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
}
else if (source->dataType == X_FLOAT16){
KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
blockNum, blockSize, stride);
blockNum, blockSize, stride);
}
BacktoCudaDev(source->devID, devIDBackup);
BacktoCudaDev(source->devID, devIDBackup);
}
}
...
...
source/tensor/core/getandset/SetData.cuh
查看文件 @
3800528b
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
/*
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
*/
#ifndef __SETDATA_CUH__
#ifndef __SETDATA_CUH__
...
@@ -28,14 +29,10 @@
...
@@ -28,14 +29,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */
/* generate data items with a fixed value p (in int, float, float16, double) */
void _CudaSetDataFixedInt(XTensor * tensor, int p);
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in float) only
/* generate data items with a fixed value p (in float) only
if the condition entry is non-zero */
if the condition entry is non-zero */
...
...
source/tensor/core/getandset/SetData.h
查看文件 @
3800528b
...
@@ -24,29 +24,22 @@
...
@@ -24,29 +24,22 @@
#define __SETDATA_H__
#define __SETDATA_H__
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "SetData.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */
/* generate data items with a xavier initialization */
void
_SetDataFanInOut
(
XTensor
*
tensor
,
DTYPE
gain
=
1
.
0
F
);
void
_SetDataFanInOut
(
XTensor
*
tensor
,
DTYPE
gain
=
1
.
0
F
);
/* generate data items with a fixed value p */
/
//
* generate data items with a fixed value p */
void
_SetDataFixed
(
XTensor
*
tensor
,
void
*
valuePointer
);
//
void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */
/* generate data items with a fixed value p (in default type) */
void
SetDataFixed
(
XTensor
&
tensor
,
DTYPE
p
);
void
SetDataFixed
(
XTensor
&
tensor
,
DTYPE
p
);
/* generate data items with a fixed value p (in integer) */
void
SetDataFixedInt
(
XTensor
&
tensor
,
int
p
);
void
SetDataFixedInt
(
XTensor
&
tensor
,
int
p
);
/* generate data items with a fixed value p (in int) */
template
<
class
T
>
void
_SetDataFixedInt
(
XTensor
*
tensor
,
int
p
);
void
_SetDataFixed
(
XTensor
*
tensor
,
T
value
);
/* generate data items with a fixed value p (in float) */
void
_SetDataFixedFloat
(
XTensor
*
tensor
,
float
p
);
/* generate data items with a fixed value p (in double) */
void
_SetDataFixedDouble
(
XTensor
*
tensor
,
double
p
);
/* generate data items with a fixed value p only if the condition entry is non-zero */
/* generate data items with a fixed value p only if the condition entry is non-zero */
void
_SetDataFixedCond
(
XTensor
*
tensor
,
XTensor
*
condition
,
DTYPE
p
);
void
_SetDataFixedCond
(
XTensor
*
tensor
,
XTensor
*
condition
,
DTYPE
p
);
...
...
source/tensor/function/DropoutWithIndex.cpp
查看文件 @
3800528b
...
@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
...
@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1D
(
&
c
,
x
.
unitNum
,
x
.
dataType
,
x
.
devID
,
x
.
mem
);
InitTensor1D
(
&
c
,
x
.
unitNum
,
x
.
dataType
,
x
.
devID
,
x
.
mem
);
_SetDataFixed
Float
(
&
c
,
1.0
F
);
_SetDataFixed
(
&
c
,
1.0
F
);
_DropoutWithIndex
(
&
x
,
&
maskIndex
,
&
c
);
_DropoutWithIndex
(
&
x
,
&
maskIndex
,
&
c
);
...
...
source/tensor/function/Loss.cpp
查看文件 @
3800528b
...
@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
...
@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
{
{
if
(
t
==
NULL
){
if
(
t
==
NULL
){
if
(
dedy
->
dataType
==
X_FLOAT
)
if
(
dedy
->
dataType
==
X_FLOAT
)
_SetDataFixed
Float
(
dedy
,
1.0
F
);
_SetDataFixed
(
dedy
,
1.0
F
);
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
else
if
(
dedy
->
dataType
==
X_DOUBLE
)
_SetDataFixed
Double
(
dedy
,
1.0
);
_SetDataFixed
(
dedy
,
1.0
);
else
if
(
dedy
->
dataType
==
X_INT
)
else
if
(
dedy
->
dataType
==
X_INT
)
_SetDataFixed
Int
(
dedy
,
1
);
_SetDataFixed
(
dedy
,
1
);
else
{
else
{
ShowNTErrors
(
"TODO"
);
ShowNTErrors
(
"TODO"
);
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论