Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
f7ed3448
Commit
f7ed3448
authored
Jul 28, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
clean the code of Split
parent
15f75d3a
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
24 行增加
和
29 行删除
+24
-29
source/network/Main.cpp
+1
-0
source/tensor/XTensor.h
+2
-1
source/tensor/core/movement/CopyBlocksOnSite.cu
+2
-2
source/tensor/core/shape/MakeMergeBlockIndex.cpp
+4
-4
source/tensor/core/shape/MakeMergeBlockIndex.h
+1
-1
source/tensor/core/shape/Merge.cpp
+12
-17
source/tensor/core/shape/Split.cpp
+2
-4
没有找到文件。
source/network/Main.cpp
查看文件 @
f7ed3448
...
...
@@ -21,6 +21,7 @@
#include <stdio.h>
#include "XNet.h"
#include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/fnnlm/FNNLM.h"
...
...
source/tensor/XTensor.h
查看文件 @
f7ed3448
...
...
@@ -47,10 +47,11 @@ struct XLink;
/* define the maximum number of dimensions in a tensor */
#define MAX_TENSOR_DIM_NUM 6
#define USE_BATCHED_STRIDED_MAT_MUL
#define MIN_TENSOR_SPLIT_NUM
1
0
#define MIN_TENSOR_SPLIT_NUM 0
#define MIN_TENSOR_SPLIT_LIST_NUM 1024
#define MIN_TENSOR_CAT_NUM 8
/* computation flags */
#define UNSAFE_BUT_FAST_MEM
#define FAST_MATRIX
...
...
source/tensor/core/movement/CopyBlocksOnSite.cu
查看文件 @
f7ed3448
...
...
@@ -90,12 +90,12 @@ void _CudaCopyBlocks(void * source, int blockSize, int blockNum, void * target,
int bSize = blockSize / sizeof(DTYPE);
if (bSize % 4 == 0) {
GDevs.GetCudaThread2D(
myMem->
devID, bSize / 4, blockNum, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(devID, bSize / 4, blockNum, MAX_INT, cudaGrids, cudaBlocks);
KernelCopyBlocks<4> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >>>
((DTYPE*)source, bSize, blockNum, (DTYPE*)target, targetBlocks);
}
else {
GDevs.GetCudaThread2D(
myMem->
devID, bSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(devID, bSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
KernelCopyBlocks<1> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >>>
((DTYPE*)source, bSize, blockNum, (DTYPE*)target, targetBlocks);
}
...
...
source/tensor/core/shape/MakeMergeBlockIndex.cpp
查看文件 @
f7ed3448
...
...
@@ -33,14 +33,14 @@ set target data block index for the data movement in merge
>> splitSizeInGrid - size of each data array to merge
>> gridSize - number of blocks in a grid (here grid is a higher level orgnization upon blocks)
>> gridNum - number of grids
>>
mem - the memory pool
>>
devID - device id
*/
void
_MakeMergeBlockIndex
(
int
*
blockIndex
,
int
blockNum
,
int
blockNumInMerge
,
int
splitSizeInGrid
,
int
gridSize
,
int
gridNum
,
XMem
*
mem
)
int
splitSizeInGrid
,
int
gridSize
,
int
gridNum
,
int
devID
)
{
if
(
mem
!=
NULL
&&
mem
->
devID
>=
0
)
{
if
(
devID
>=
0
)
{
#ifdef USE_CUDA
_CudaMakeMergeBlockIndex
(
mem
->
devID
,
blockIndex
,
blockNum
,
blockNumInMerge
,
splitSizeInGrid
,
gridSize
,
gridNum
);
_CudaMakeMergeBlockIndex
(
devID
,
blockIndex
,
blockNum
,
blockNumInMerge
,
splitSizeInGrid
,
gridSize
,
gridNum
);
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
...
...
source/tensor/core/shape/MakeMergeBlockIndex.h
查看文件 @
f7ed3448
...
...
@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in merge */
void
_MakeMergeBlockIndex
(
int
*
blockIndex
,
int
blockNum
,
int
blockNumInMerge
,
int
splitSizeInGrid
,
int
gridSize
,
int
gridNum
,
XMem
*
mem
);
int
splitSizeInGrid
,
int
gridSize
,
int
gridNum
,
int
devID
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/shape/Merge.cpp
查看文件 @
f7ed3448
...
...
@@ -44,10 +44,10 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
{
if
(
leadingDim
<
0
)
leadingDim
=
0
;
int
whereToMergeRDI
=
s
->
order
-
whereToMerge
-
1
;
int
leadingDimRDI
=
s
->
order
-
leadingDim
-
1
;
int
whereToMergeRDI
=
s
->
order
-
whereToMerge
-
1
;
int
leadingDimRDI
=
s
->
order
-
leadingDim
-
1
;
if
(
leadingDimRDI
<
0
)
leadingDimRDI
=
s
->
order
-
1
;
leadingDimRDI
=
s
->
order
-
1
;
CheckNTErrors
((
s
!=
NULL
&&
t
!=
NULL
),
"Invalid tensors!"
);
CheckNTErrors
((
s
->
devID
==
t
->
devID
||
(
s
->
devID
<
0
&&
t
->
devID
<
0
)),
...
...
@@ -121,28 +121,24 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
int
realBlockSize
=
blockSize
*
t
->
unitSize
;
int
*
blockIndex
=
(
int
*
)(
mem
!=
NULL
?
mem
->
AllocBuf
(
mem
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
))
:
XMemAlloc
(
mem
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
)));
mem
->
AllocBuf
(
mem
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
))
:
XMemAlloc
(
s
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
)));
_MakeMergeBlockIndex
(
blockIndex
,
blockNum
,
blockNumInMerge
,
splitSizeInGrid
,
gridSize
,
gridNum
,
mem
);
_MakeMergeBlockIndex
(
blockIndex
,
blockNum
,
blockNumInMerge
,
splitSizeInGrid
,
gridSize
,
gridNum
,
s
->
devID
);
_CopyBlocksOnSite
(
s
->
data
,
realBlockSize
,
blockNum
,
dataTMP
,
blockIndex
,
s
->
devID
);
if
(
mem
!=
NULL
)
mem
->
ReleaseBuf
(
mem
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
));
else
XMemFree
(
mem
->
devID
,
blockIndex
);
/* copy from tmp to target */
XMemCopy
(
t
->
data
,
t
->
devID
,
dataTMP
,
s
->
devID
,
size
);
XMemFree
(
s
->
devID
,
blockIndex
);
if
(
!
isOnSameDevice
)
{
XMemCopy
(
t
->
data
,
t
->
devID
,
dataTMP
,
s
->
devID
,
size
);
if
(
mem
!=
NULL
)
mem
->
ReleaseBuf
(
mem
->
devID
,
size
);
else
XMemFree
(
mem
->
devID
,
dataTMP
);
XMemFree
(
s
->
devID
,
dataTMP
);
}
}
}
...
...
@@ -165,7 +161,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
CheckNTErrors
(
leadingDim
<
whereToMerge
,
"Invalid leading dimension!"
);
if
(
leadingDim
<
0
)
leadingDim
=
0
;
leadingDim
=
0
;
int
order
=
s
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
...
...
@@ -207,7 +203,7 @@ merge small tensors into a big tensor
*/
void
_Merge
(
const
XList
*
smalls
,
XTensor
*
big
,
int
whereToMerge
)
{
CheckNTErrors
((
smalls
!=
NULL
),
"Invalid list!"
);
CheckNTErrors
((
smalls
!=
NULL
),
"Invalid list!"
);
CheckNTErrors
((
smalls
->
count
>
0
),
"Empty list!"
);
bool
uniform
=
true
;
...
...
@@ -235,7 +231,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
int
mergedNum
=
smalls
->
count
;
XTensor
*
s0
=
(
XTensor
*
)
smalls
->
GetItem
(
0
);
int
whereToMergeRDI
=
s0
->
order
-
whereToMerge
-
1
;
int
whereToMergeRDI
=
s0
->
order
-
whereToMerge
-
1
;
for
(
int
i
=
0
;
i
<
s0
->
order
;
i
++
)
{
if
(
i
<=
whereToMergeRDI
)
blockSize
*=
s0
->
dimSizeRDI
[
i
];
...
...
@@ -300,9 +296,8 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
_Merge
(
tensorTMP
,
big
,
whereToMerge
+
1
);
delete
[]
dimSizeTMP
;
tensorTMP
->
data
=
NULL
;
dataTMP
=
NULL
;
tensorTMP
->
data
=
NULL
;
delete
tensorTMP
;
if
((
!
uniform
)
&&
(
mem
!=
NULL
))
...
...
source/tensor/core/shape/Split.cpp
查看文件 @
f7ed3448
...
...
@@ -83,7 +83,6 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
CheckNTErrors
((
blockNum
%
splitNum
==
0
),
"Incorrect split number!"
);
if
(
splitNum
<=
MIN_TENSOR_SPLIT_NUM
)
{
//if (splitNum <= 0) {
int
sPitch
=
blockSize
*
splitNum
*
s
->
unitSize
;
int
tPitch
=
blockSize
*
t
->
unitSize
;
int
mSize
=
blockSize
*
t
->
unitSize
;
...
...
@@ -143,7 +142,7 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
if
(
mem
!=
NULL
)
mem
->
ReleaseBuf
(
mem
->
devID
,
blockNum
*
sizeof
(
int
));
else
XMemFree
(
mem
->
devID
,
blockIndex
);
XMemFree
(
s
->
devID
,
blockIndex
);
/* copy from tmp to target */
if
(
!
isOnSameDevice
)
{
...
...
@@ -152,7 +151,7 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
if
(
mem
!=
NULL
)
mem
->
ReleaseBuf
(
mem
->
devID
,
size
);
else
XMemFree
(
mem
->
devID
,
dataTMP
);
XMemFree
(
s
->
devID
,
dataTMP
);
}
}
}
...
...
@@ -321,7 +320,6 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
delete
[]
dimSizeTMP
;
tensorTMP
->
data
=
NULL
;
dataTMP
=
NULL
;
delete
tensorTMP
;
if
((
!
uniform
)
&&
(
mem
!=
NULL
))
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论