Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
1da50ae2
Commit
1da50ae2
authored
5 years ago
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
using cpu float16 and test fnn and t2t times
parent
29d2352b
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
955 行增加
和
42 行删除
+955
-42
source/network/Main.cpp
+838
-31
source/sample/fnnlm/FNNLM.cpp
+39
-3
source/sample/transformer/T2TTrainer.cpp
+38
-4
source/tensor/XTensor.cpp
+39
-3
source/tensor/core/utilities/FlushToMem.cu
+1
-1
没有找到文件。
source/network/Main.cpp
查看文件 @
1da50ae2
...
...
@@ -15,7 +15,7 @@
* limitations under the License.
*/
/*
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-10
*/
...
...
@@ -28,42 +28,788 @@
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using
namespace
nts
;
using
namespace
fnnlm
;
using
namespace
transformer
;
void
BackwardTest
();
void
TransposeTest
();
void
SumDimTest
();
using
namespace
nts
;
using
namespace
fnnlm
;
using
namespace
transformer
;
int
main
(
int
argc
,
const
char
**
argv
)
void
BackwardTest
();
void
TransposeTest
();
void
SumDimTest
();
//void SplitBackwardTest();
void
MemTest
();
//void xcTest();
void
ConvertDataTypeTest
();
void
ConvertDataTypeBackwardTest
();
void
SumFP16Test
();
void
GatherFP16Test
();
void
HardTanHFP16Test
();
void
ReduceMaxFP16Test
();
void
ReduceSumFP16Test
();
void
LogSoftmaxFP16Test
();
void
ClipFP16Test
();
void
ScaleAndShiftFP16Test
();
void
InitTensorFP16Test
();
void
MultiplyDimTime
();
void
TimeTestGemm
();
void
TimeTest
();
void
TimeInt8AndFloat32
();
void
TestCPUhalf
();
int
main
(
int
argc
,
const
char
**
argv
)
{
//_CrtSetDbgFlag(_CrtSetDbgFlag(_CRTDBG_REPORT_FLAG) | _CRTDBG_LEAK_CHECK_DF);
//_CrtSetBreakAlloc(2708);
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
Test
();
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-fnnlm"
))
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-fnnlm"
))
FNNLMMain
(
argc
-
1
,
argv
+
1
);
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-t2t"
))
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-t2t"
))
TransformerMain
(
argc
-
1
,
argv
+
1
);
else
{
else
{
fprintf
(
stderr
,
"Thanks for using NiuTrans.Network! This is a library for building
\n
"
);
fprintf
(
stderr
,
"neural networks in an easy way.
\n\n
"
);
fprintf
(
stderr
,
"Run this program with
\"
-test
\"
for unit test!
\n
"
);
fprintf
(
stderr
,
"Or run this program with
\"
-fnnlm
\"
for sample FNNLM!
\n
"
);
fprintf
(
stderr
,
"Or run this program with
\"
-t2t
\"
for sample Transformer!
\n
"
);
}
//xcTest();
//return 0;
//MemTest();
//return 0;
//SplitBackwardTest();
//return 0;
//_CrtSetBreakAlloc(896);
//BackwardTest();
//return 0;
//Test();
//return 0;
//ConvertDataTypeTest();
//return 0;
//ConvertDataTypeBackwardTest();
//return 0;
//SumFP16Test();
//return 0;
//GatherFP16Test();
//return 0;
//HardTanHFP16Test();
//return 0;
//ReduceMaxFP16Test();
//return 0;
//ReduceSumFP16Test();
//return 0;
//LogSoftmaxFP16Test();
//return 0;
//ClipFP16Test();
//return 0;
//ScaleAndShiftFP16Test();
//return 0;
//InitTensorFP16Test();
//return 0;
//_CrtDumpMemoryLeaks();
return
0
;
}
void
TestCPUhalf
()
{
int
memSize
=
1024
;
int
devId
=
0
;
int
dim1
=
1024
;
int
dim2
=
32
;
XMem
*
mem
;
mem
=
new
XMem
(
devId
,
FREE_ON_THE_FLY
,
(
MTYPE
)
MILLION
*
256
,
1024
,
MILLION
*
128
);
mem
->
SetDesiredSize
(
devId
,
0
,
(
MTYPE
)
memSize
*
MILLION
);
XTensor
a
;
XTensor
b
;
XTensor
c
;
//XMem *mem = new XMem(devId, FREE_ON_THE_FLY, 128 * MILLION, 1024, 128 * MILLION);
//mem->SetDesiredSize(0,0,memSize*MILLION);
InitTensor2D
(
&
a
,
dim1
,
dim1
,
X_FLOAT
,
devId
);
InitTensor2D
(
&
b
,
dim2
,
dim2
,
X_FLOAT
,
devId
);
InitTensor2D
(
&
c
,
dim1
,
dim1
,
X_FLOAT
,
devId
);
}
void
TimeInt8AndFloat32
()
{
XMem
*
mem
;
int
memSize
=
1024
;
int
devId
=
2
;
int
dim
=
512
;
mem
=
new
XMem
(
devId
,
FREE_ON_THE_FLY
,
(
MTYPE
)
MILLION
*
256
,
1024
,
MILLION
*
128
);
mem
->
SetDesiredSize
(
devId
,
0
,
(
MTYPE
)
memSize
*
MILLION
);
XTensor
a
;
XTensor
b
;
XTensor
c
;
InitTensor2D
(
&
a
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
InitTensor2D
(
&
b
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
InitTensor2D
(
&
c
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
a
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
b
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
XTensor
inta
;
XTensor
intb
;
XTensor
intc
;
InitTensor2D
(
&
inta
,
dim
,
dim
,
X_INT
,
devId
,
mem
);
InitTensor2D
(
&
intb
,
dim
,
dim
,
X_INT
,
devId
,
mem
);
InitTensor2D
(
&
intc
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
XTensor
tmp
;
InitTensor2D
(
&
tmp
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
tmp
.
SetDataRand
(
-
100000.0
F
,
100000.0
F
);
inta
=
ConvertDataType
(
tmp
,
X_INT8
);
intb
=
ConvertDataType
(
tmp
,
X_INT8
);
int
repeat
=
10000
;
printf
(
"test on matrixmul
\n
"
);
double
start_matrixmul32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
_MatrixMul
(
&
a
,
X_NOTRANS
,
&
b
,
X_NOTRANS
,
&
c
);
}
double
elapsed_matrixmul32
=
GetClockSec
()
-
start_matrixmul32
;
printf
(
"elapsed_matrixmul32=%.2fs
\n
"
,
elapsed_matrixmul32
);
double
start_int8
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
_MatrixMul
(
&
inta
,
X_NOTRANS
,
&
intb
,
X_NOTRANS
,
&
intc
);
}
double
elapsed_int8
=
GetClockSec
()
-
start_int8
;
printf
(
"elapsed_int8=%.2fs
\n
"
,
elapsed_int8
);
}
void
TimeTest
()
{
XMem
*
mem
;
int
memSize
=
1024
;
int
devId
=
0
;
int
dim
=
512
;
mem
=
new
XMem
(
devId
,
FREE_ON_THE_FLY
,
(
MTYPE
)
MILLION
*
256
,
1024
,
MILLION
*
128
);
mem
->
SetDesiredSize
(
devId
,
0
,
(
MTYPE
)
memSize
*
MILLION
);
XTensor
a
;
XTensor
b
;
XTensor
c
;
XTensor
halfa
;
XTensor
halfb
;
XTensor
halfc
;
InitTensor2D
(
&
a
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
InitTensor2D
(
&
b
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
InitTensor2D
(
&
c
,
dim
,
dim
,
X_FLOAT
,
devId
,
mem
);
InitTensor2D
(
&
halfc
,
dim
,
dim
,
X_FLOAT16
,
devId
,
mem
);
a
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
b
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
halfa
=
ConvertDataType
(
a
,
X_FLOAT16
);
halfb
=
ConvertDataType
(
b
,
X_FLOAT16
);
int
repeat
=
100000
;
printf
(
"=========================================
\n
"
);
printf
(
"test on sum
\n
"
);
double
start_sum32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
Sum
(
&
a
,
&
b
);
}
double
elapsed_sum32
=
GetClockSec
()
-
start_sum32
;
printf
(
"elapsed_sum32=%.2fs
\n
"
,
elapsed_sum32
);
double
start_sum16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
Sum
(
&
halfa
,
&
halfb
);
}
double
elapsed_sum16
=
GetClockSec
()
-
start_sum16
;
printf
(
"elapsed_sum16=%.2fs
\n
"
,
elapsed_sum16
);
printf
(
"=========================================
\n
"
);
/*printf("test on sub\n");
double start_sub32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Sub(&a, &b);
}
double elapsed_sub32 = GetClockSec() - start_sub32;
printf("elapsed_sub32=%.2fs\n", elapsed_sub32);
double start_sub16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Sub(&halfa, &halfb);
}
double elapsed_sub16 = GetClockSec() - start_sub16;
printf("elapsed_sub16=%.2fs\n", elapsed_sub16);
printf("=========================================\n");*/
/*printf("test on div\n");
double start_div32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Div(&a, &b);
}
double elapsed_div32 = GetClockSec() - start_div32;
printf("elapsed_div32=%.2fs\n", elapsed_div32);
double start_div16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Div(&halfa, &halfb);
}
double elapsed_div16 = GetClockSec() - start_div16;
printf("elapsed_div16=%.2fs\n", elapsed_div16);
printf("=========================================\n");*/
/*printf("test on multiply\n");
double start_multiply32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Multiply(&a, &b);
}
double elapsed_multiply32 = GetClockSec() - start_multiply32;
printf("elapsed_multiply32=%.2fs\n", elapsed_multiply32);
double start_multiply16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Multiply(&halfa, &halfb);
}
double elapsed_multiply16 = GetClockSec() - start_multiply16;
printf("elapsed_multiply16=%.2fs\n", elapsed_multiply16);
printf("=========================================\n");*/
printf
(
"test on scaleandshift
\n
"
);
double
start_scaleandshift32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
ScaleAndShift
(
&
a
,
1
,
0
);
}
double
elapsed_scaleandshift32
=
GetClockSec
()
-
start_scaleandshift32
;
printf
(
"elapsed_scaleandshift32=%.2fs
\n
"
,
elapsed_scaleandshift32
);
double
start_scaleandshift16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
ScaleAndShift
(
&
halfa
,
1
,
0
);
}
double
elapsed_scaleandshift16
=
GetClockSec
()
-
start_scaleandshift16
;
printf
(
"elapsed_scaleandshift16=%.2fs
\n
"
,
elapsed_scaleandshift16
);
printf
(
"=========================================
\n
"
);
printf
(
"test on reducesum
\n
"
);
double
start_reducesum32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
ReduceSum
(
&
a
,
1
);
}
double
elapsed_reducesum32
=
GetClockSec
()
-
start_reducesum32
;
printf
(
"elapsed_reducesum32=%.2fs
\n
"
,
elapsed_reducesum32
);
double
start_reducesum16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
ReduceSum
(
&
halfa
,
1
);
}
double
elapsed_reducesum16
=
GetClockSec
()
-
start_reducesum16
;
printf
(
"elapsed_reducesum16=%.2fs
\n
"
,
elapsed_reducesum16
);
printf
(
"=========================================
\n
"
);
printf
(
"test on reducemax
\n
"
);
double
start_reducemax32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
ReduceMax
(
&
a
,
1
);
}
double
elapsed_reducemax32
=
GetClockSec
()
-
start_reducemax32
;
printf
(
"elapsed_reducemax32=%.2fs
\n
"
,
elapsed_reducemax32
);
double
start_reducemax16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
ReduceMax
(
&
halfa
,
1
);
}
double
elapsed_reducemax16
=
GetClockSec
()
-
start_reducemax16
;
printf
(
"elapsed_reducemax16=%.2fs
\n
"
,
elapsed_reducemax16
);
printf
(
"=========================================
\n
"
);
printf
(
"test on logsoftmax
\n
"
);
double
start_logsoftmax32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
LogSoftmax
(
&
a
,
1
);
}
double
elapsed_logsoftmax32
=
GetClockSec
()
-
start_logsoftmax32
;
printf
(
"elapsed_logsoftmax32=%.2fs
\n
"
,
elapsed_logsoftmax32
);
double
start_logsoftmax16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
LogSoftmax
(
&
halfa
,
1
);
}
double
elapsed_logsoftmax16
=
GetClockSec
()
-
start_logsoftmax16
;
printf
(
"elapsed_logsoftmax16=%.2fs
\n
"
,
elapsed_logsoftmax16
);
printf
(
"=========================================
\n
"
);
printf
(
"test on matrixmul
\n
"
);
double
start_matrixmul32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
MatrixMul
(
&
a
,
&
b
);
}
double
elapsed_matrixmul32
=
GetClockSec
()
-
start_matrixmul32
;
printf
(
"elapsed_matrixmul32=%.2fs
\n
"
,
elapsed_matrixmul32
);
double
start_matrixmul16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
MatrixMul
(
&
halfa
,
&
halfb
);
}
double
elapsed_matrixmul16
=
GetClockSec
()
-
start_matrixmul16
;
printf
(
"elapsed_matrixmul16=%.2fs
\n
"
,
elapsed_matrixmul16
);
printf
(
"=========================================
\n
"
);
printf
(
"test on convert
\n
"
);
double
start_convert32to16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfa
=
ConvertDataType
(
a
,
X_FLOAT16
);
}
double
elapsed_convert32to16
=
GetClockSec
()
-
start_convert32to16
;
printf
(
"elapsed_convert32to16=%.2fs
\n
"
,
elapsed_convert32to16
);
double
start_convert16to32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
a
=
ConvertDataType
(
halfa
,
X_FLOAT
);
}
double
elapsed_convert16to32
=
GetClockSec
()
-
start_convert16to32
;
printf
(
"elapsed_convert16to32=%.2fs
\n
"
,
elapsed_convert16to32
);
printf
(
"=========================================
\n
"
);
delete
mem
;
}
void
MultiplyDimTime
()
{
int
memSize
=
1024
;
int
devId
=
0
;
int
dim1
=
1024
;
int
dim2
=
32
;
XMem
*
mem
;
mem
=
new
XMem
(
devId
,
FREE_ON_THE_FLY
,
(
MTYPE
)
MILLION
*
256
,
1024
,
MILLION
*
128
);
mem
->
SetDesiredSize
(
devId
,
0
,
(
MTYPE
)
memSize
*
MILLION
);
XTensor
a
;
XTensor
b
;
XTensor
c
;
//XMem *mem = new XMem(devId, FREE_ON_THE_FLY, 128 * MILLION, 1024, 128 * MILLION);
//mem->SetDesiredSize(0,0,memSize*MILLION);
InitTensor2D
(
&
a
,
dim1
,
dim1
,
X_FLOAT
,
devId
);
InitTensor2D
(
&
b
,
dim2
,
dim2
,
X_FLOAT
,
devId
);
InitTensor2D
(
&
c
,
dim1
,
dim1
,
X_FLOAT
,
devId
);
a
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
b
.
SetDataRandn
(
-
1.0
F
,
1.0
F
);
int
repeat
=
2000
;
printf
(
"test on MultiplyDim
\n
"
);
double
start
=
GetClockSec
();
for
(
int
j
=
0
;
j
<=
repeat
;
j
++
)
{
c
=
MultiplyDim
(
&
a
,
&
b
,
0
);
}
double
elapsed
=
GetClockSec
()
-
start
;
printf
(
"elapsed_MultiplyDim32=%.4fs
\n
"
,
elapsed
);
XTensor
halfa
;
XTensor
halfb
;
XTensor
halfc
;
InitTensor2D
(
&
halfc
,
dim1
,
dim1
,
X_FLOAT16
,
devId
,
mem
);
halfa
=
ConvertDataType
(
a
,
X_FLOAT16
);
halfb
=
ConvertDataType
(
b
,
X_FLOAT16
);
double
starthalf
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
MultiplyDim
(
&
halfa
,
&
halfb
,
0
);
}
double
elapsedhalf
=
GetClockSec
()
-
starthalf
;
printf
(
"elapsed_MultiplyDim16=%.4fs
\n
"
,
elapsedhalf
);
}
void
TimeTestGemm
()
{
XMem
*
mem
;
int
memSize
=
1024
;
delete
mem
;
mem
=
new
XMem
(
0
,
FREE_ON_THE_FLY
,
(
MTYPE
)
MILLION
*
256
,
1024
,
MILLION
*
128
);
mem
->
SetDesiredSize
(
0
,
0
,
(
MTYPE
)
memSize
*
MILLION
);
XTensor
a
;
XTensor
b
;
XTensor
c
;
XTensor
halfa
;
XTensor
halfb
;
XTensor
halfc
;
int
dim1
=
512
;
int
dim2
=
1024
;
//InitTensor3D(&a, 86, 48, 256, X_FLOAT, 0, mem);
//InitTensor2D(&b, 256, 256, X_FLOAT, 0, mem);
//InitTensor4D(&a, 8, 86, 48, 48, X_FLOAT, 0, mem);
//InitTensor4D(&b, 8, 86, 48, 32, X_FLOAT, 0, mem);
InitTensor2D
(
&
a
,
dim1
,
dim2
,
X_FLOAT
,
0
,
mem
);
InitTensor2D
(
&
b
,
dim1
,
dim2
,
X_FLOAT
,
0
,
mem
);
//InitTensor4D(&a, 8, 86, 48, 32, X_FLOAT, 0);
//InitTensor4D(&b, 8, 86, 48, 32, X_FLOAT, 0);
a
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
b
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
halfa
=
ConvertDataType
(
a
,
X_FLOAT16
);
halfb
=
ConvertDataType
(
b
,
X_FLOAT16
);
//a.Dump(&a, stderr, "a:", 10);
//b.Dump(&b, stderr, "b:", 10);
//halfa.Dump(&a, stderr, "halfa:", 10);
//halfb.Dump(&b, stderr, "halfb:", 10);
int
repeat
=
10000
;
printf
(
"=========================================
\n
"
);
double
start_matrixmul16
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
halfc
=
BMMul
(
halfa
,
X_NOTRANS
,
halfb
,
X_TRANS
);
}
double
elapsed_matrixmul16
=
GetClockSec
()
-
start_matrixmul16
;
printf
(
"elapsed_matrixmul16=%.4fs
\n
"
,
elapsed_matrixmul16
);
printf
(
"------------------------------------------
\n
"
);
double
start_matrixmul32
=
GetClockSec
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
c
=
BMMul
(
a
,
X_NOTRANS
,
b
,
X_TRANS
);
}
double
elapsed_matrixmul32
=
GetClockSec
()
-
start_matrixmul32
;
printf
(
"elapsed_matrixmul32=%.4fs
\n
"
,
elapsed_matrixmul32
);
printf
(
"=========================================
\n
"
);
c
.
Dump
(
&
c
,
stderr
,
"c:"
,
10
);
halfc
.
Dump
(
&
halfc
,
stderr
,
"halfc:"
,
10
);
}
void
InitTensorFP16Test
()
{
XTensor
a
;
InitTensor2D
(
&
a
,
1
,
10
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
10.0
F
,
10.0
F
);
XTensor
halfA
;
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
halfA
.
Dump
(
&
halfA
,
stderr
,
"halfA:"
);
XTensor
b
;
InitTensor2D
(
&
b
,
1
,
10
,
X_FLOAT16
,
0
);
_SetDataRand
(
&
b
,
-
10.0
F
,
10.0
F
);
b
.
Dump
(
&
b
,
stderr
,
"b:"
);
}
void
ScaleAndShiftFP16Test
()
{
XTensor
a
;
XTensor
intA
;
XTensor
b
;
XTensor
intB
;
InitTensor2D
(
&
a
,
1
,
10
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
10.0
F
,
10.0
F
);
a
.
Dump
(
stderr
,
"a:"
);
intA
=
ConvertDataType
(
a
,
X_INT
);
intB
=
ScaleAndShift
(
intA
,
2
,
0
);
b
=
ConvertDataType
(
intB
,
X_FLOAT
);
b
.
Dump
(
stderr
,
"b:"
);
}
void
ClipFP16Test
()
{
XTensor
a
;
XTensor
intA
;
XTensor
b
;
XTensor
intB
;
InitTensor2D
(
&
a
,
1
,
10
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
10.0
F
,
10.0
F
);
a
.
Dump
(
stderr
,
"a:"
);
intA
=
ConvertDataType
(
a
,
X_INT
);
intB
=
Clip
(
intA
,
-
1
,
1
);
b
=
ConvertDataType
(
intB
,
X_FLOAT
);
b
.
Dump
(
stderr
,
"b:"
);
}
void
LogSoftmaxFP16Test
()
{
XTensor
a
;
XTensor
halfA
;
XTensor
b
;
XTensor
halfB
;
InitTensor3D
(
&
a
,
2
,
2
,
2
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
1.0
F
,
1.0
F
);
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
b
=
LogSoftmax
(
a
,
1
);
halfB
=
LogSoftmax
(
halfA
,
1
);
b
.
Dump
(
stderr
,
"sum:"
);
halfB
.
Dump
(
&
halfB
,
stderr
,
"halfSum:"
);
}
void
ReduceSumFP16Test
()
{
XTensor
a
;
XTensor
sum
;
XTensor
halfA
;
XTensor
halfSum
;
InitTensor2D
(
&
a
,
10
,
10
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
5.0
F
,
5.0
F
);
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
sum
=
ReduceSum
(
a
,
1
);
halfSum
=
ReduceSum
(
halfA
,
1
);
sum
.
Dump
(
stderr
,
"sum:"
);
halfSum
.
Dump
(
&
halfSum
,
stderr
,
"halfSum:"
);
}
void
ReduceMaxFP16Test
()
{
XTensor
a
;
XTensor
max
;
XTensor
halfA
;
XTensor
halfMax
;
InitTensor2D
(
&
a
,
10
,
10
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
5.0
F
,
5.0
F
);
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
max
=
ReduceMax
(
a
,
1
);
halfMax
=
ReduceMax
(
halfA
,
1
);
max
.
Dump
(
stderr
,
"max:"
);
halfMax
.
Dump
(
&
halfMax
,
stderr
,
"halfMax:"
);
}
void
HardTanHFP16Test
()
{
XTensor
a
;
XTensor
b
;
XTensor
halfA
;
XTensor
halfB
;
InitTensor2D
(
&
a
,
5
,
5
,
X_FLOAT
,
0
);
InitTensor2D
(
&
b
,
5
,
5
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
1.0
F
,
4.0
F
);
b
.
SetDataRand
(
-
1.0
F
,
4.0
F
);
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
halfB
=
ConvertDataType
(
b
,
X_FLOAT16
);
a
.
Dump
(
stderr
,
"a:"
);
b
.
Dump
(
stderr
,
"b:"
);
b
=
HardTanH
(
a
);
halfB
=
HardTanH
(
halfA
);
b
.
Dump
(
stderr
,
"b:"
);
halfB
.
Dump
(
&
halfB
,
stderr
,
"halfB:"
);
}
void
GatherFP16Test
()
{
XTensor
a
;
XTensor
b
;
XTensor
srcIndex
;
XTensor
halfA
;
XTensor
halfB
;
XTensor
c
;
InitTensor1D
(
&
srcIndex
,
2
,
X_INT
,
0
);
int
m
=
0
;
int
n
=
1
;
srcIndex
.
Set1DInt
(
m
,
0
);
srcIndex
.
Set1DInt
(
n
,
1
);
InitTensor2D
(
&
a
,
3
,
2
,
X_FLOAT
,
0
);
InitTensor2D
(
&
b
,
2
,
2
,
X_FLOAT
,
0
);
InitTensor2D
(
&
halfB
,
2
,
2
,
X_FLOAT16
,
0
);
a
.
SetDataRand
(
-
5.0
F
,
5.0
F
);
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
a
.
Dump
(
stderr
,
"a:"
);
_Gather
(
&
a
,
&
b
,
&
srcIndex
);
b
.
Dump
(
stderr
,
"b:"
);
_Gather
(
&
halfA
,
&
halfB
,
&
srcIndex
);
c
=
ConvertDataType
(
halfB
,
X_FLOAT
);
c
.
Dump
(
stderr
,
"c:"
);
}
void
SumFP16Test
()
{
XTensor
a
;
XTensor
b
;
XTensor
halfA
;
XTensor
halfB
;
InitTensor2D
(
&
a
,
5
,
5
,
X_FLOAT
,
0
);
InitTensor2D
(
&
b
,
5
,
5
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
1.0
F
,
4.0
F
);
b
.
SetDataRand
(
-
1.0
F
,
4.0
F
);
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
halfB
=
ConvertDataType
(
b
,
X_FLOAT16
);
a
.
Dump
(
stderr
,
"a:"
);
b
.
Dump
(
stderr
,
"b:"
);
b
=
Sum
(
a
,
b
,
-
0.4
F
);
halfB
=
Sum
(
halfA
,
halfB
,
-
0.4
F
);
b
.
Dump
(
stderr
,
"b:"
);
halfB
.
Dump
(
&
halfB
,
stderr
,
"halfB:"
);
}
void
ConvertDataTypeTest
()
{
int
rnum
=
0
;
for
(
int
i
=
0
;
i
<=
rnum
;
i
++
)
{
XTensor
a
;
InitTensor2D
(
&
a
,
2
,
2
,
X_FLOAT
,
0
);
XTensor
halfa
;
InitTensor2D
(
&
halfa
,
2
,
2
,
X_FLOAT16
,
0
);
XTensor
a1
;
InitTensor2D
(
&
a1
,
2
,
2
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
-
10.0
F
,
10.0
F
);
a
.
Dump
(
stderr
,
"a:"
);
halfa
=
ConvertDataType
(
a
,
X_FLOAT16
);
a1
=
ConvertDataType
(
halfa
,
X_FLOAT
);
a1
.
Dump
(
stderr
,
"halfa:"
);
}
}
void
ConvertDataTypeBackwardTest
()
{
int
rnum
=
0
;
for
(
int
i
=
0
;
i
<=
rnum
;
i
++
)
{
XTensor
a
;
InitTensor2D
(
&
a
,
2
,
2
,
X_FLOAT
,
0
);
a
.
SetDataRand
(
2.0
F
,
2.0
F
);
a
.
Dump
(
stderr
,
"a:"
);
XTensor
halfA
;
XTensor
a1
;
halfA
=
ConvertDataType
(
a
,
X_FLOAT16
);
a1
=
ConvertDataType
(
halfA
,
X_FLOAT
);
a1
.
grad
=
NewTensor
(
&
a1
);
a1
.
grad
->
SetDataRand
(
3.0
F
,
3.0
F
);
a1
.
grad
->
Dump
(
stderr
,
"a1.grad:"
);
XNet
testBackward
;
printf
(
"1"
);
testBackward
.
Backward
(
a1
);
printf
(
"2"
);
halfA
.
grad
->
Dump
(
stderr
,
"halfA.grad:"
);
a
.
grad
->
Dump
(
stderr
,
"a.grad:"
);
}
}
//XTensor * stack(XList& list, int leadingDim)
//{
// size_t size = list.count;
// if (list.count == 0)
// return NULL;
// XTensor * sample = (XTensor*)list.Get(0);
//
// XTensor merge_tensor;
// int order = sample->order;
// int * dim = new int[order];
// for (int i = 0; i < order; i++)
// dim[i] = sample->GetDim(i);
// dim[leadingDim] *= size;
//
// InitTensor(&merge_tensor, order, dim, DEFAULT_DTYPE, sample->denseRatio, sample->devID, sample->mem);
//
// _Merge(&list, &merge_tensor, leadingDim);
// delete[] dim;
//
// order += 1;
// dim = new int[order];
// dim[0] = size;
// for (size_t i = 1; i < order; i++) {
// if (i != leadingDim)
// dim[i] = sample->GetDim(i - 1);
// else
// dim[i] = sample->GetDim(i - 1) / size;
// }
//
// XTensor * split_tensor = new XTensor(order, dim, DEFAULT_DTYPE, sample->denseRatio, sample->devID, sample->mem);
// _Split(&merge_tensor, split_tensor, leadingDim, size);
// delete[] dim;
//
// return split_tensor;
//}
//void xcTest()
//{
// int * dimSize = new int[2];
// dimSize[0] = 2;
// dimSize[1] = 4;
//
// XTensor t1;
// InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
// XTensor t2;
// InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
// XTensor tensor;
//
// _SetDataFixed(&t1, 1.0F);
// _SetDataFixed(&t2, 2.0F);
//
// tensor = t1 + t2;
//
// XList smalls;
//
// XTensor first;
// XTensor second;
// InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
// InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
// smalls.Add(&t1);
// smalls.Add(&t2);
//
// XTensor* result = stack(smalls, 0);
// result->Dump(stderr, "", 100);
//}
void
BackwardTest
()
{
XNet
net
;
...
...
@@ -71,9 +817,6 @@ void BackwardTest()
XTensor
a
;
XTensor
b
;
XTensor
c
;
a
.
enableGrad
=
true
;
b
.
enableGrad
=
false
;
c
.
enableGrad
=
false
;
XTensor
mean
;
XTensor
origin
;
InitTensor2D
(
&
a
,
2
,
3
);
...
...
@@ -91,15 +834,14 @@ void BackwardTest()
b
.
Set1D
(
2.0
F
,
0
);
b
.
Set1D
(
1.0
F
,
1
);
DivDim
(
a
,
b
,
c
,
0
);
c
=
DivDim
(
a
,
b
,
0
);
c
.
Dump
(
stderr
,
"c:"
);
auto
loss
=
CrossEntropy
(
c
,
a
);
//XLink::ShowNetwork(stderr, &c);
net
.
Backward
(
loss
);
net
.
Backward
(
c
);
a
.
grad
->
Dump
(
stderr
);
net
.
Dump
(
stderr
);
}
...
...
@@ -122,7 +864,7 @@ void TransposeTest()
int
nnn
=
GDevs
.
nGPU
;
InitTensor3D
(
&
x
,
B
,
N
,
H
,
X_FLOAT
,
0
);
InitTensor4D
(
&
y
,
K
,
B
,
N
,
H
/
K
,
X_FLOAT
,
0
);
InitTensor4D
(
&
y
,
K
,
B
,
N
,
H
/
K
,
X_FLOAT
,
0
);
InitTensor3D
(
&
z
,
B
,
N
,
H
,
X_FLOAT
,
0
);
cudaEvent_t
ctime0
;
...
...
@@ -146,7 +888,7 @@ void TransposeTest()
cudaEventRecord
(
ctime0
,
0
);
double
time0
=
GetClock
();
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
_Split
(
&
x
,
&
y
,
2
,
K
);
double
time1
=
GetClock
();
...
...
@@ -157,7 +899,7 @@ void TransposeTest()
cudaEventRecord
(
ctime2
,
0
);
double
time2
=
GetClock
();
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
_Merge
(
&
y
,
&
x
,
3
);
double
time3
=
GetClock
();
...
...
@@ -168,7 +910,7 @@ void TransposeTest()
cudaEventRecord
(
ctime4
,
0
);
double
time4
=
GetClock
();
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
for
(
int
i
=
0
;
i
<
loops
;
i
++
)
_Sum
(
&
x
,
&
z
,
&
x
);
double
time5
=
GetClock
();
...
...
@@ -201,11 +943,11 @@ void SumDimTest()
DTYPE
*
data
=
new
DTYPE
[
x
.
unitNum
];
for
(
int
i
=
0
;
i
<
x
.
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
x
.
unitNum
;
i
++
)
data
[
i
]
=
(
DTYPE
)
i
;
x
.
SetData
(
data
,
x
.
unitNum
);
for
(
int
i
=
0
;
i
<
y
.
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
y
.
unitNum
;
i
++
)
data
[
i
]
=
-
(
DTYPE
)
i
;
y
.
SetData
(
data
,
y
.
unitNum
);
...
...
@@ -215,3 +957,67 @@ void SumDimTest()
delete
[]
data
;
}
//void SplitBackwardTest()
//{
// int * dimSize = new int[2];
// dimSize[0] = 2;
// dimSize[1] = 4;
//
// XTensor t1;
// InitTensor2D(&t1, 2, 4, X_FLOAT, 0, NULL);
// XTensor t2;
// InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
// XTensor tensor;
//
// //_SetDataFixedFloat(&t1, 1.0F);
// //_SetDataFixedFloat(&t2, 2.0F);
// t1.SetDataRand();
// t2.SetDataRand();
//
// tensor = t1 + t2;
//
// XList smalls;
//
// XTensor first;
// XTensor second;
// InitTensor2D(&first, 2, 2, X_FLOAT, 0, NULL);
// InitTensor2D(&second, 2, 2, X_FLOAT, 0, NULL);
// smalls.Add(&first);
// smalls.Add(&second);
//
// Split(tensor, smalls, 1, 2);
//
// XTensor mul;
// mul = Sum(first, second);
//
// XNet net;
// net.Backward(mul);
// net.Dump(stderr);
//
// printf("Done!");
//}
void
MemTest
()
{
XMem
*
mem
;
mem
=
new
XMem
(
0
,
FREE_ON_THE_FLY
,
(
MTYPE
)
MILLION
,
1024
,
MILLION
);
XTensor
tensor
;
InitTensor2D
(
&
tensor
,
2
,
4
,
X_FLOAT
,
0
,
mem
);
tensor
.
SetZeroAll
();
tensor
.
Dump
(
stderr
);
delete
mem
;
if
(
tensor
.
mem
!=
NULL
)
{
printf
(
"It isn't null!
\n
"
);
printf
(
"%d
\n
"
,
(
int
)
tensor
.
mem
->
signature
);
}
else
{
printf
(
"It's null
\n
"
);
}
tensor
.
Dump
(
stderr
);
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
source/sample/fnnlm/FNNLM.cpp
查看文件 @
1da50ae2
...
...
@@ -416,6 +416,18 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
mkgold
=
0.0
;
double
train_time
=
0.0
;
double
clearModel
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
end
=
0.0
;
double
start
=
0.0
;
double
time
;
/* iterate for a number of epochs */
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
){
...
...
@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
wordCount
=
0
;
loss
=
0
;
ngramNum
=
1
;
while
(
ngramNum
>
0
){
/* load a minibatch of ngrams */
...
...
@@ -447,13 +458,18 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the loss tensor */
XTensor
lossTensor
;
start
=
GetClockSec
();
/* make the input tensor for position i */
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
);
mkinput
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* make the gold tensor */
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
);
mkgold
+=
GetClockSec
()
-
start
;
time
=
GetClockSec
();
if
(
!
autoDiff
){
/* prepare an empty network for building the fnn */
FNNNet
net
;
...
...
@@ -475,28 +491,37 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss
-=
prob
;
}
else
{
start
=
GetClockSec
();
/* gradient = 0 */
Clear
(
model
,
true
);
clearModel
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* forward + backward process */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
forward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* this is implemented by multiply function */
lossTensor
=
CrossEntropy
(
output
,
gold
);
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* update model parameters */
Update
(
model
,
grad
,
learningRate
,
true
);
update
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* get probabilities */
float
prob
=
ReduceSumAll
(
lossTensor
);
loss
+=
prob
;
end
+=
GetClockSec
()
-
start
;
}
train_time
+=
GetClockSec
()
-
time
;
wordCount
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
...
...
@@ -507,8 +532,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT8
(
0
,
stderr
,
"[Time] mkinput=%.5lfs,mkgold=%.5lfs,train_time=%.5lfs,clearModel=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf, end=%.5lf
\n
"
,
mkinput
,
mkgold
,
train_time
,
clearModel
,
forward
,
backward
,
update
,
end
);
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
mkinput
=
0.0
;
mkgold
=
0.0
;
train_time
=
0.0
;
clearModel
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
end
=
0.0
;
}
}
...
...
This diff is collapsed.
Click to expand it.
source/sample/transformer/T2TTrainer.cpp
查看文件 @
1da50ae2
...
...
@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
train_time
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
start
=
0.0
;
double
time
=
0.0
;
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
#ifndef WIN32
if
(
isShuffled
)
...
...
@@ -177,17 +185,30 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
/* label smoothed gold standard (if needed) */
XTensor
goldSmoothed
;
while
(
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
//while (batchLoader.LoadBatch(file, model->isLM,
// &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
// NULL, vSize, vSizeTgt,
// sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
while
(
true
)
{
start
=
GetClockSec
();
int
batch
=
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
NULL
,
vSize
,
vSizeTgt
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
))
{
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
);
mkinput
+=
GetClockSec
()
-
start
;
if
(
!
batch
)
{
break
;
}
time
=
GetClockSec
();
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
/* output probabilities */
XTensor
output
;
start
=
GetClockSec
();
/* make the network */
if
(
model
->
isLM
)
model
->
MakeLM
(
batchEnc
,
output
,
paddingEnc
,
true
);
...
...
@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
else
{
ShowNTErrors
(
"Illegal model type!"
);
}
forward
+=
GetClockSec
()
-
start
;
/* back-propagation for obtaining gradients */
//if (labelSmoothingP > 0)
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
start
=
GetClockSec
();
XTensor
labelOnehot
;
labelOnehot
=
IndexToOnehot
(
label
,
vSizeTgt
,
labelSmoothingP
);
...
...
@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
net
.
Backward
(
lossTensor
);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
gradStep
+=
1
;
loss
+=
prob
;
wordCount
+=
wc
;
...
...
@@ -248,10 +272,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
gradStep
=
0
;
validStep
++
;
update
+=
GetClockSec
()
-
start
;
}
}
else
nSkipped
++
;
train_time
+=
GetClockSec
()
-
time
;
if
(
++
step
>=
nstep
){
isEnd
=
true
;
...
...
@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT6
(
0
,
stderr
,
"[Time] elapsed=%.5lfs,mkinput=%.5lfs,train_time=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf
\n
"
,
elapsed
,
mkinput
,
train_time
,
forward
,
backward
,
update
);
XPRINT8
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f"
,
elapsed
,
step
,
epoch
,
wordCountTotal
,
wordCountBatch
,
loss
/
wordCount
,
exp
(
loss
/
wordCount
),
exp
(
prob
/
wc
));
if
(
!
doUpdate
)
XPRINT
(
0
,
stderr
,
" (no update)"
);
XPRINT
(
0
,
stderr
,
"
\n
"
);
mkinput
=
0.0
;
train_time
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
}
if
(
nStepCheckpoint
>
0
&&
++
nStepCheck
>=
nStepCheckpoint
){
...
...
This diff is collapsed.
Click to expand it.
source/tensor/XTensor.cpp
查看文件 @
1da50ae2
...
...
@@ -25,6 +25,7 @@
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
#include "halfLib/half/half.hpp"
#include <stdio.h>
#include <stdlib.h>
...
...
@@ -50,6 +51,11 @@
#include "function/Identity.h"
#include "core/CHeader.h"
//#include "halfLib/HalfFloat/umHalf.h"
#ifdef USE_CUDA
// the CUDA stuff
...
...
@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
XMemCopy
(
data
,
devID
,
tensor
.
data
,
tensor
.
devID
,
size
);
if
(
dataHost
!=
NULL
&&
tensor
.
dataHost
!=
NULL
)
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
}
else
{
DestroyData
();
...
...
@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
else
if
(
dataType
==
X_FLOAT16
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
)
{
halfCPU
f
=
((
halfCPU
*
)
d
)[
i
];
if
(
i
==
beg
)
fprintf
(
file
,
"%hx"
,
f
);
else
fprintf
(
file
,
" %hx"
,
f
);
}
}
else
if
(
dataType
==
X_INT
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
...
...
@@ -1900,9 +1917,22 @@ dump data to a file
*/
void
XTensor
::
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
{
if
(
tensor
->
dataType
==
X_FLOAT
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_CopyValues
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
if
(
tensor
->
dataType
==
X_FLOAT16
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
X_FLOAT
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_ConvertDataType
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
{
ShowNTErrors
(
"TO DO!"
);
}
}
/*
...
...
@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
else
if
(
dataType
==
X_FLOAT16
){
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
{
halfCPU
*
f
=
((
halfCPU
*
)
data
)
+
i
;
if
(
fscanf
(
file
,
"%hx"
,
f
)
<
1
)
{
ShowNTErrors
(
"Incorrect tensor format!"
);
}
}
}
else
{
ShowNTErrors
(
"TODO!"
);
}
...
...
@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
}
}
do
{
c
=
fgetc
(
file
);
}
while
(
c
!=
'\n'
&&
c
!=
EOF
);
XMemCopy
(
dataBackup
,
devID
,
data
,
-
1
,
GetDataSizeInChar
());
data
=
dataBackup
;
delete
[](
char
*
)
dataBuf
;
delete
[](
char
*
)
dataBuf
;
}
/*
...
...
This diff is collapsed.
Click to expand it.
source/tensor/core/utilities/FlushToMem.cu
查看文件 @
1da50ae2
...
...
@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
/* copy the data from GPU memory to CPU memory */
void CudaGPUToCPUFlush(XTensor * tensor)
{
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
//
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
...
...
This diff is collapsed.
Click to expand it.
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论