Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
e5a709dc
Commit
e5a709dc
authored
Jul 28, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add Transpose and buf fixes
parent
e84e725e
显示空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
228 行增加
和
22 行删除
+228
-22
source/network/Main.cpp
+63
-0
source/tensor/Main.cpp
+34
-0
source/tensor/XDevice.cpp
+2
-5
source/tensor/XMem.cpp
+4
-1
source/tensor/XUtility.cpp
+15
-6
source/tensor/core/shape/Merge.cpp
+6
-2
source/tensor/core/shape/Transpose.cpp
+98
-0
source/tensor/core/shape/Transpose.h
+6
-8
没有找到文件。
source/network/Main.cpp
查看文件 @
e5a709dc
...
@@ -30,11 +30,16 @@
...
@@ -30,11 +30,16 @@
//#include <stdlib.h>
//#include <stdlib.h>
//#include <crtdbg.h>
//#include <crtdbg.h>
void
TransposeTest
();
using
namespace
nts
;
using
namespace
nts
;
using
namespace
samplefnnlm
;
using
namespace
samplefnnlm
;
int
main
(
int
argc
,
const
char
**
argv
)
int
main
(
int
argc
,
const
char
**
argv
)
{
{
//TransposeTest();
//return 0;
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
1
;
//Test();
1
;
//Test();
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-fnnlm"
))
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-fnnlm"
))
...
@@ -81,3 +86,61 @@ int main( int argc, const char ** argv )
...
@@ -81,3 +86,61 @@ int main( int argc, const char ** argv )
return
0
;
return
0
;
}
}
void
TransposeTest
()
{
XMem
mem0
(
0
,
UNI_FREE
,
MILLION
*
64
,
1024
,
MILLION
*
64
);
//XMem mem1(1, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
XTensor
x
;
XTensor
y
;
int
loops
=
2000
;
int
B
=
3
*
2
*
4
;
int
K
=
8
*
1
;
int
N
=
50
;
int
H
=
512
*
4
;
int
nnn
=
GDevs
.
nGPU
;
InitTensor3D
(
&
x
,
B
,
N
,
H
,
X_FLOAT
,
0
);
InitTensor4D
(
&
y
,
K
,
B
,
N
,
H
/
K
,
X_FLOAT
,
0
);
cudaEvent_t
ctime0
;
cudaEvent_t
ctime1
;
cudaEvent_t
ctime2
;
cudaEvent_t
ctime3
;
float
elapsedSplit
=
0.0
;
float
elapsedMerge
=
0.0
;
cudaEventCreate
(
&
ctime0
);
cudaEventCreate
(
&
ctime1
);
cudaEventCreate
(
&
ctime2
);
cudaEventCreate
(
&
ctime3
);
cudaEventRecord
(
ctime0
,
0
);
double
time0
=
GetClock
();
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
_Split
(
&
x
,
&
y
,
2
,
K
);
double
time1
=
GetClock
();
cudaEventRecord
(
ctime1
,
0
);
cudaEventSynchronize
(
ctime1
);
cudaEventElapsedTime
(
&
elapsedSplit
,
ctime0
,
ctime1
);
cudaEventRecord
(
ctime2
,
0
);
double
time2
=
GetClock
();
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
_Merge
(
&
y
,
&
x
,
3
);
double
time3
=
GetClock
();
cudaEventRecord
(
ctime3
,
0
);
cudaEventSynchronize
(
ctime3
);
cudaEventElapsedTime
(
&
elapsedMerge
,
ctime2
,
ctime3
);
fprintf
(
stderr
,
"split:%f merge:%f
\n
"
,
time1
-
time0
,
time3
-
time2
);
fprintf
(
stderr
,
"split:%f merge:%f
\n
"
,
elapsedSplit
,
elapsedMerge
);
}
source/tensor/Main.cpp
查看文件 @
e5a709dc
...
@@ -29,6 +29,7 @@
...
@@ -29,6 +29,7 @@
#include "XTensor.h"
#include "XTensor.h"
#include "XDevice.h"
#include "XDevice.h"
#include "./test/Test.h"
#include "./test/Test.h"
#include "./core/CHeader.h"
//#define CRTDBG_MAP_ALLOC
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <stdlib.h>
...
@@ -37,6 +38,7 @@
...
@@ -37,6 +38,7 @@
using
namespace
nts
;
using
namespace
nts
;
void
SmallTest
();
void
SmallTest
();
void
TransposeTest
();
int
main
(
int
argc
,
const
char
**
argv
)
int
main
(
int
argc
,
const
char
**
argv
)
{
{
...
@@ -92,3 +94,35 @@ void SmallTest()
...
@@ -92,3 +94,35 @@ void SmallTest()
c
.
Dump
(
stderr
,
"c:"
);
c
.
Dump
(
stderr
,
"c:"
);
d
.
Dump
(
stderr
,
"d:"
);
d
.
Dump
(
stderr
,
"d:"
);
}
}
void
TransposeTest
()
{
XTensor
a
;
XTensor
b
;
int
I
=
2
;
int
J
=
3
;
InitTensor4D
(
&
a
,
2
,
3
,
4
,
5
);
int
*
dims
=
new
int
[
a
.
order
];
memcpy
(
dims
,
a
.
dimSize
,
sizeof
(
int
)
*
a
.
order
);
dims
[
I
]
=
a
.
dimSize
[
J
];
dims
[
J
]
=
a
.
dimSize
[
I
];
InitTensor
(
&
b
,
4
,
dims
);
a
.
SetZeroAll
();
b
.
SetZeroAll
();
float
*
data
=
new
float
[
a
.
unitNum
];
for
(
int
i
=
0
;
i
<
a
.
unitNum
;
i
++
)
data
[
i
]
=
(
float
)
i
;
a
.
SetData
(
data
,
a
.
unitNum
,
0
);
_Transpose
(
&
a
,
&
b
,
I
,
J
);
b
.
Dump
(
stderr
,
"b:"
);
delete
[]
data
;
}
source/tensor/XDevice.cpp
查看文件 @
e5a709dc
...
@@ -405,13 +405,10 @@ int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int
...
@@ -405,13 +405,10 @@ int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int
memset
(
gridSize
,
0
,
sizeof
(
int
)
*
3
);
memset
(
gridSize
,
0
,
sizeof
(
int
)
*
3
);
memset
(
blockSize
,
0
,
sizeof
(
int
)
*
3
);
memset
(
blockSize
,
0
,
sizeof
(
int
)
*
3
);
if
(
n
<=
0
||
m
<=
0
||
devID
>=
nGPU
)
if
(
n
<=
0
||
m
<=
0
)
return
1
;
return
1
;
if
(
devID
<
0
){
CheckNTErrors
(
devID
>=
0
&&
devID
<
nGPU
,
"Invalid GPU device id!"
);
XPRINT
(
0
,
stderr
,
"WARNING! You are calling the grid and block size computation function on a CPU!"
);
return
0
;
}
#ifdef USE_CUDA
#ifdef USE_CUDA
...
...
source/tensor/XMem.cpp
查看文件 @
e5a709dc
...
@@ -181,7 +181,10 @@ void XMem::Free(int myDevID, void * mem)
...
@@ -181,7 +181,10 @@ void XMem::Free(int myDevID, void * mem)
else
{
else
{
#ifdef USE_CUDA
#ifdef USE_CUDA
SetDevice
(
myDevID
);
SetDevice
(
myDevID
);
CheckNTErrors
(
cudaFree
((
char
*
)
mem
)
==
cudaSuccess
,
"Cannot free the memory."
);
cudaError_t
error
=
cudaFree
((
char
*
)
mem
);
if
(
error
!=
cudaSuccess
){
ShowNTErrors
(
"Cannot free the memory."
);
}
#else
#else
ShowNTErrors
(
"Please specify USE_CUDA for compiling this program."
);
ShowNTErrors
(
"Please specify USE_CUDA for compiling this program."
);
#endif
#endif
...
...
source/tensor/XUtility.cpp
查看文件 @
e5a709dc
...
@@ -208,6 +208,9 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
...
@@ -208,6 +208,9 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
#ifdef USE_CUDA
#ifdef USE_CUDA
void
XMemCopyAsync
(
void
*
t
,
int
devIDT
,
const
void
*
s
,
int
devIDS
,
size_t
size
,
cudaStream_t
stream
,
int
streamDevID
)
void
XMemCopyAsync
(
void
*
t
,
int
devIDT
,
const
void
*
s
,
int
devIDS
,
size_t
size
,
cudaStream_t
stream
,
int
streamDevID
)
{
{
if
(
t
==
s
)
return
;
int
devIDBackup
=
-
1
;
int
devIDBackup
=
-
1
;
if
(
streamDevID
>=
0
&&
(
devIDT
>=
0
||
devIDS
>=
0
)){
if
(
streamDevID
>=
0
&&
(
devIDT
>=
0
||
devIDS
>=
0
)){
CheckNTErrors
((
cudaGetDevice
(
&
devIDBackup
)
==
cudaSuccess
),
"Cannot get GPU device id!"
);
CheckNTErrors
((
cudaGetDevice
(
&
devIDBackup
)
==
cudaSuccess
),
"Cannot get GPU device id!"
);
...
@@ -220,17 +223,23 @@ void XMemCopyAsync(void * t, int devIDT, const void * s, int devIDS, size_t size
...
@@ -220,17 +223,23 @@ void XMemCopyAsync(void * t, int devIDT, const void * s, int devIDS, size_t size
return
;
return
;
}
}
else
if
(
devIDT
>=
0
&&
devIDS
<
0
){
else
if
(
devIDT
>=
0
&&
devIDS
<
0
){
CheckNTErrors
((
cudaMemcpyAsync
(
t
,
s
,
size
,
cudaMemcpyHostToDevice
,
stream
)
==
cudaSuccess
),
cudaError_t
error
=
cudaMemcpyAsync
(
t
,
s
,
size
,
cudaMemcpyHostToDevice
,
stream
);
"cudaMemcpyAsync error (cudaMemcpyHostToDevice)"
);
if
(
error
!=
cudaSuccess
){
ShowNTErrors
(
"cudaMemcpyAsync error (cudaMemcpyHostToDevice)"
);
}
}
}
else
if
(
devIDT
<
0
&&
devIDS
>=
0
){
else
if
(
devIDT
<
0
&&
devIDS
>=
0
){
CheckNTErrors
((
cudaMemcpyAsync
(
t
,
s
,
size
,
cudaMemcpyDeviceToHost
,
stream
)
==
cudaSuccess
),
cudaError_t
error
=
cudaMemcpyAsync
(
t
,
s
,
size
,
cudaMemcpyDeviceToHost
,
stream
);
"cudaMemcpyAsync error (cudaMemcpyDeviceToHost)"
);
if
(
error
!=
cudaSuccess
){
ShowNTErrors
(
"cudaMemcpyAsync error (cudaMemcpyDeviceToHost)"
);
}
}
}
else
{
else
{
//if(devIDT == devIDS){
//if(devIDT == devIDS){
CheckNTErrors
((
cudaMemcpyAsync
(
t
,
s
,
size
,
cudaMemcpyDeviceToDevice
,
stream
)
==
cudaSuccess
),
cudaError_t
error
=
cudaMemcpyAsync
(
t
,
s
,
size
,
cudaMemcpyDeviceToDevice
,
stream
);
"cudaMemcpyAsync error (cudaMemcpyDeviceToDevice)"
);
if
(
error
!=
cudaSuccess
){
ShowNTErrors
(
"cudaMemcpyAsync error (cudaMemcpyDeviceToDevice)"
);
}
//}
//}
/*else{
/*else{
CheckNTErrors((cudaMemcpyPeerAsync(t, devIDT, s, devIDS, size, stream) == cudaSuccess),
CheckNTErrors((cudaMemcpyPeerAsync(t, devIDT, s, devIDS, size, stream) == cudaSuccess),
...
...
source/tensor/core/shape/Merge.cpp
查看文件 @
e5a709dc
...
@@ -62,8 +62,12 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
...
@@ -62,8 +62,12 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
CheckNTErrors
((
t
->
dimSizeRDI
[
i
]
==
s
->
dimSizeRDI
[
i
]
*
s
->
dimSizeRDI
[
leadingDimRDI
]),
CheckNTErrors
((
t
->
dimSizeRDI
[
i
]
==
s
->
dimSizeRDI
[
i
]
*
s
->
dimSizeRDI
[
leadingDimRDI
]),
"Unmatched tensor sizes!"
);
"Unmatched tensor sizes!"
);
}
}
else
if
(
i
<
leadingDimRDI
){
CheckNTErrors
((
s
->
dimSizeRDI
[
i
]
==
t
->
dimSizeRDI
[
i
]),
"Unmatched tensor sizes!"
);
}
else
if
(
i
>
leadingDimRDI
)
{
else
if
(
i
>
leadingDimRDI
)
{
CheckNTErrors
((
s
->
dimSizeRDI
[
i
-
1
]
==
t
->
dimSizeRDI
[
i
]),
CheckNTErrors
((
s
->
dimSizeRDI
[
i
]
==
t
->
dimSizeRDI
[
i
-
1
]),
"Unmatched tensor sizes!"
);
"Unmatched tensor sizes!"
);
}
}
}
}
...
@@ -126,7 +130,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
...
@@ -126,7 +130,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
_MakeMergeBlockIndex
(
blockIndex
,
blockNum
,
blockNumInMerge
,
splitSizeInGrid
,
gridSize
,
gridNum
,
s
->
devID
);
_MakeMergeBlockIndex
(
blockIndex
,
blockNum
,
blockNumInMerge
,
splitSizeInGrid
,
gridSize
,
gridNum
,
s
->
devID
);
_CopyBlocksOnSite
(
s
->
data
,
realBlockSize
,
blockNum
,
dataTMP
,
blockIndex
,
s
->
devID
);
_CopyBlocksOnSite
(
s
->
data
,
realBlockSize
,
blockNum
*
gridNum
,
dataTMP
,
blockIndex
,
s
->
devID
);
if
(
mem
!=
NULL
)
if
(
mem
!=
NULL
)
mem
->
ReleaseBuf
(
mem
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
));
mem
->
ReleaseBuf
(
mem
->
devID
,
blockNum
*
gridNum
*
sizeof
(
int
));
...
...
source/tensor/core/shape/Transpose.cpp
查看文件 @
e5a709dc
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-28
* It is extreamly hot these days and i cannot sleep well. Fortunately we had
* good lunch of Steamed Cold Noodles. This made me feel much better!
*/
#include "Transpose.h"
#include "Merge.h"
#include "../../XUtility.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
tensor transposition of dimensions i and j
b = transposed(a)
*/
void
_Transpose
(
const
XTensor
*
a
,
XTensor
*
b
,
const
int
i
,
const
int
j
)
{
CheckNTErrors
(
a
&&
b
,
"Empty tensors"
);
CheckNTErrors
(
a
->
order
==
b
->
order
,
"Wrong tensor orders"
);
CheckNTErrors
(
a
->
unitNum
==
b
->
unitNum
&&
a
->
unitSize
==
b
->
unitSize
,
"Wrong tensor sizes"
);
CheckNTErrors
(
a
->
order
>
i
&&
i
>=
0
,
"index of dimension is out of scope!"
);
CheckNTErrors
(
a
->
order
>
j
&&
j
>=
0
,
"index of dimension is out of scope!"
);
for
(
int
k
=
0
;
k
<
a
->
order
;
k
++
){
if
(
k
==
i
){
CheckNTErrors
(
a
->
dimSize
[
k
]
==
b
->
dimSize
[
j
],
"Wrong dimension size in transposition"
);
}
else
if
(
k
==
j
){
CheckNTErrors
(
a
->
dimSize
[
k
]
==
b
->
dimSize
[
i
],
"Wrong dimension size in transposition"
);
}
else
{
CheckNTErrors
(
a
->
dimSize
[
k
]
==
b
->
dimSize
[
k
],
"Wrong dimension size in transposition"
);
}
}
if
(
i
==
j
){
XMemCopy
(
b
->
data
,
b
->
devID
,
a
->
data
,
a
->
devID
,
b
->
unitNum
*
b
->
unitSize
);
}
else
{
int
I
=
MIN
(
i
,
j
);
int
J
=
MAX
(
i
,
j
);
int
*
dims
=
new
int
[
a
->
order
+
1
];
for
(
int
k
=
0
;
k
<=
J
;
k
++
)
dims
[
k
]
=
a
->
dimSize
[
k
];
dims
[
J
+
1
]
=
-
1
;
for
(
int
k
=
J
+
1
;
k
<
a
->
order
;
k
++
)
dims
[
k
+
1
]
=
a
->
dimSize
[
k
];
/* reshape tensor a form (..., n_I, ..., n_J, ...) => (..., n_I, ..., n_J, 1, ...)*/
XTensor
*
aTMP
=
new
XTensor
(
a
->
order
+
1
,
dims
,
a
->
dataType
,
a
->
denseRatio
,
a
->
devID
,
a
->
mem
);
aTMP
->
data
=
a
->
data
;
for
(
int
k
=
0
;
k
<
I
;
k
++
)
dims
[
k
]
=
a
->
dimSize
[
k
];
for
(
int
k
=
I
+
1
;
k
<=
J
;
k
++
)
dims
[
k
-
1
]
=
a
->
dimSize
[
k
];
dims
[
J
]
=
a
->
dimSize
[
I
];
for
(
int
k
=
J
+
1
;
k
<
a
->
order
;
k
++
)
dims
[
k
]
=
a
->
dimSize
[
k
];
/* reshape tensor b form (..., m_I, ..., m_J, ...) => (..., m_J, m_I, ...) */
b
->
Reshape
(
b
->
order
,
dims
);
/* tensor (..., n_I, ..., n_J, 1, ...) => tensor (..., m_J, m_I, ...) */
_Merge
(
aTMP
,
b
,
J
+
1
,
I
);
memcpy
(
dims
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
dims
[
I
]
=
a
->
dimSize
[
J
];
dims
[
J
]
=
a
->
dimSize
[
I
];
/* reshape tensor b form (..., m_J, m_I, ...) => (..., m_J, ..., m_I, ...) => */
b
->
Reshape
(
b
->
order
,
dims
);
aTMP
->
data
=
NULL
;
delete
[]
dims
;
delete
aTMP
;
}
}
}
source/tensor/core/shape/Transpose.h
查看文件 @
e5a709dc
...
@@ -27,27 +27,25 @@
...
@@ -27,27 +27,25 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
#define transpose _Transpose_
/*
/*
generate a transposed 1D/2D tensor
tensor transposition of dimensions i and j
b = transposed(a)
b = transposed(a)
*/
*/
void
_Transpose
(
XTensor
*
a
,
XTensor
*
b
);
void
_Transpose
(
const
XTensor
*
a
,
XTensor
*
b
,
const
int
i
,
const
int
j
);
/*
/*
t
ranspose a 1D/2D tensor (do it on site).
t
ensor transposition of dimensions i and j (do this on site)
keep the result in the input tensor and return nothing.
keep the result in the input tensor and return nothing.
a = transposed(a)
a = transposed(a)
*/
*/
void
_TransposeMe
(
XTensor
*
a
);
void
_TransposeMe
(
XTensor
*
a
,
const
int
i
,
const
int
j
);
/*
/*
make a transposed 1D/2D tensor
(return a XTensor structure).
tensor transposition of dimensions i and j
(return a XTensor structure).
make a new tensor to keep the result and return it.
make a new tensor to keep the result and return it.
b = transposed(a)
b = transposed(a)
*/
*/
XTensor
Transpose
(
XTensor
&
a
);
XTensor
Transpose
(
const
XTensor
&
a
,
const
int
i
,
const
int
j
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论