Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
b5c4aa4e
Commit
b5c4aa4e
authored
5 years ago
by
linye
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update divdim
parent
2ffea05e
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
253 行增加
和
2 行删除
+253
-2
source/tensor/core/arithmetic/DivDim.cu
+30
-0
source/tensor/test/TDivDim.cpp
+221
-0
source/tensor/test/Test.cpp
+2
-2
没有找到文件。
source/tensor/core/arithmetic/DivDim.cu
查看文件 @
b5c4aa4e
...
...
@@ -17,6 +17,7 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-15
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-15 float16 added
*/
#include "DivDim.cuh"
...
...
@@ -168,6 +169,35 @@ void _CudaDivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE
ShowNTErrors("Something is wrong!");
}
}
else if (a->dataType == X_FLOAT16) {
unsigned short temp = FloatToFloat16(alpha);
half alpha1 = *((half *)&temp);
if (stride > 1){
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (alpha == (DTYPE)0.0F)
KernelDivWithCol<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockSize, stride, blockSize * stride, blockNum, alpha1);
else
KernelDivWithCol<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockSize, stride, blockSize * stride, blockNum, alpha1);
}
else if (stride == 1){
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (alpha == (DTYPE)0.0F)
KernelDivWithRow<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockNum, blockSize, alpha1);
else
KernelDivWithRow<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((__half*)a->data, (__half*)b->data, (__half*)c->data,
blockNum, blockSize, alpha1);
}
else {
ShowNTErrors("Something is wrong!");
}
}
else {
ShowNTErrors("TODO!");
}
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/TDivDim.cpp
查看文件 @
b5c4aa4e
...
...
@@ -17,11 +17,13 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-14
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-15 float16 added
*/
#include "TDivDim.h"
#include "../core/arithmetic/DivDim.h"
#include "../XTensor.h"
#include "../core/getandset/ConvertDataType.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -251,6 +253,207 @@ bool TestDivDim2()
#endif // USE_CUDA
}
/*
case 3: float16 tensor division c = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting.
In this case, (2, 4) / (2) = (2, 4), n = 0, alpha = 0.0.
*/
bool
TestDivDim3
()
{
/* a tensor of size (2, 4) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
2
;
aDimSize
[
1
]
=
4
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (2) */
int
bOrder
=
1
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
2
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
]
=
{
1.0
F
,
-
1.0
F
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* create float16 tensor */
XTensor
aHalfGPU
;
XTensor
bHalfGPU
;
XTensor
cHalfGPU
;
XTensor
cMeHalfGPU
;
XTensor
cUserHalfGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
cMeGPU
->
SetData
(
aData
,
aUnitNum
);
bGPU
->
SetData
(
bData
,
bUnitNum
);
cGPU
->
SetZeroAll
();
/* convert data type from float to float16 */
aHalfGPU
=
ConvertDataType
(
*
aGPU
,
X_FLOAT16
);
bHalfGPU
=
ConvertDataType
(
*
bGPU
,
X_FLOAT16
);
cHalfGPU
=
ConvertDataType
(
*
cGPU
,
X_FLOAT16
);
cMeHalfGPU
=
ConvertDataType
(
*
cMeGPU
,
X_FLOAT16
);
/* call sum function */
_DivDim
(
&
aHalfGPU
,
&
bHalfGPU
,
&
cHalfGPU
,
0
);
_DivDim
(
&
cMeHalfGPU
,
&
bHalfGPU
,
0
);
cUserHalfGPU
=
DivDim
(
aHalfGPU
,
bHalfGPU
,
0
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
cHalfGPU
,
cGPU
);
_ConvertDataType
(
&
cMeHalfGPU
,
cMeGPU
);
cUserGPU
=
ConvertDataType
(
cUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 4: float16 tensor division c = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting.
In this case, (2, 4) / (2, 2) = (2, 4), n = 1.
*/
bool
TestDivDim4
()
{
/* a tensor of size (2, 4) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
2
;
aDimSize
[
1
]
=
4
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (2, 2) */
int
bOrder
=
2
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
2
;
bDimSize
[
1
]
=
2
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
{
-
1.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
3.0
F
},
{
4.0
F
,
-
5.0
F
,
-
6.0
F
,
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* create float16 tensor */
XTensor
aHalfGPU
;
XTensor
bHalfGPU
;
XTensor
cHalfGPU
;
XTensor
cMeHalfGPU
;
XTensor
cUserHalfGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
cMeGPU
->
SetData
(
aData
,
aUnitNum
);
bGPU
->
SetData
(
bData
,
bUnitNum
);
cGPU
->
SetZeroAll
();
/* convert data type from float to float16 */
aHalfGPU
=
ConvertDataType
(
*
aGPU
,
X_FLOAT16
);
bHalfGPU
=
ConvertDataType
(
*
bGPU
,
X_FLOAT16
);
cHalfGPU
=
ConvertDataType
(
*
cGPU
,
X_FLOAT16
);
cMeHalfGPU
=
ConvertDataType
(
*
cMeGPU
,
X_FLOAT16
);
/* call sum function */
_DivDim
(
&
aHalfGPU
,
&
bHalfGPU
,
&
cHalfGPU
,
1
);
_DivDim
(
&
cMeHalfGPU
,
&
bHalfGPU
,
1
);
cUserHalfGPU
=
DivDim
(
aHalfGPU
,
bHalfGPU
,
1
);
/* convert data type from float16 to float */
_ConvertDataType
(
&
cHalfGPU
,
cGPU
);
_ConvertDataType
(
&
cMeHalfGPU
,
cMeGPU
);
cUserGPU
=
ConvertDataType
(
cUserHalfGPU
,
X_FLOAT
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
...
...
@@ -280,6 +483,24 @@ bool TestDivDim()
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestDivDim3
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
/* case 4 test */
caseFlag
=
TestDivDim4
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 4 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 4 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
This diff is collapsed.
Click to expand it.
source/tensor/test/Test.cpp
查看文件 @
b5c4aa4e
...
...
@@ -39,7 +39,7 @@ bool Test()
//wrong = !TestCopyIndexed() || wrong;
//wrong = !TestCopyValues() || wrong;
//wrong = !TestDiv() || wrong;
//
wrong = !TestDivDim() || wrong;
wrong
=
!
TestDivDim
()
||
wrong
;
//wrong = !TestExp() || wrong;
//wrong = !TestGather() || wrong;
//wrong = !TestLog() || wrong;
...
...
@@ -82,7 +82,7 @@ bool Test()
//wrong = !TestCrossEntropy() || wrong;
//wrong = !TestDropout() || wrong;
wrong
=
!
TestHardTanH
()
||
wrong
;
//
wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong;
...
...
This diff is collapsed.
Click to expand it.
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论