Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
1da50ae2
Commit
1da50ae2
authored
Aug 05, 2019
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
using cpu float16 and test fnn and t2t times
parent
29d2352b
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
131 行增加
和
25 行删除
+131
-25
source/network/Main.cpp
+0
-0
source/sample/fnnlm/FNNLM.cpp
+45
-9
source/sample/transformer/T2TTrainer.cpp
+43
-9
source/tensor/XTensor.cpp
+42
-6
source/tensor/core/utilities/FlushToMem.cu
+1
-1
没有找到文件。
source/network/Main.cpp
查看文件 @
1da50ae2
差异被折叠。
点击展开。
source/sample/fnnlm/FNNLM.cpp
查看文件 @
1da50ae2
...
...
@@ -415,7 +415,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XNet
autoDiffer
;
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
mkgold
=
0.0
;
double
train_time
=
0.0
;
double
clearModel
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
end
=
0.0
;
double
start
=
0.0
;
double
time
;
/* iterate for a number of epochs */
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
){
...
...
@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
wordCount
=
0
;
loss
=
0
;
ngramNum
=
1
;
while
(
ngramNum
>
0
){
/* load a minibatch of ngrams */
...
...
@@ -447,20 +458,25 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the loss tensor */
XTensor
lossTensor
;
start
=
GetClockSec
();
/* make the input tensor for position i */
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
);
mkinput
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* make the gold tensor */
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
);
mkgold
+=
GetClockSec
()
-
start
;
time
=
GetClockSec
();
if
(
!
autoDiff
){
/* prepare an empty network for building the fnn */
FNNNet
net
;
/* gradident = 0 */
Clear
(
grad
,
false
);
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
...
...
@@ -475,40 +491,60 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss
-=
prob
;
}
else
{
start
=
GetClockSec
();
/* gradient = 0 */
Clear
(
model
,
true
);
clearModel
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* forward + backward process */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
/* this is implemented by multiply function */
forward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* this is implemented by multiply function */
lossTensor
=
CrossEntropy
(
output
,
gold
);
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* update model parameters */
Update
(
model
,
grad
,
learningRate
,
true
);
update
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* get probabilities */
float
prob
=
ReduceSumAll
(
lossTensor
);
loss
+=
prob
;
end
+=
GetClockSec
()
-
start
;
}
train_time
+=
GetClockSec
()
-
time
;
wordCount
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
if
(
++
step
>=
nStep
){
isEnd
=
true
;
break
;
}
if
(
step
%
100
==
0
)
{
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT8
(
0
,
stderr
,
"[Time] mkinput=%.5lfs,mkgold=%.5lfs,train_time=%.5lfs,clearModel=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf, end=%.5lf
\n
"
,
mkinput
,
mkgold
,
train_time
,
clearModel
,
forward
,
backward
,
update
,
end
);
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
mkinput
=
0.0
;
mkgold
=
0.0
;
train_time
=
0.0
;
clearModel
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
end
=
0.0
;
}
}
...
...
source/sample/transformer/T2TTrainer.cpp
查看文件 @
1da50ae2
...
...
@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
train_time
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
start
=
0.0
;
double
time
=
0.0
;
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
#ifndef WIN32
if
(
isShuffled
)
...
...
@@ -176,18 +184,31 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
/* label smoothed gold standard (if needed) */
XTensor
goldSmoothed
;
while
(
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
NULL
,
vSize
,
vSizeTgt
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
))
{
//while (batchLoader.LoadBatch(file, model->isLM,
// &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
// NULL, vSize, vSizeTgt,
// sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
while
(
true
)
{
start
=
GetClockSec
();
int
batch
=
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
NULL
,
vSize
,
vSizeTgt
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
);
mkinput
+=
GetClockSec
()
-
start
;
if
(
!
batch
)
{
break
;
}
time
=
GetClockSec
();
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
/* output probabilities */
XTensor
output
;
start
=
GetClockSec
();
/* make the network */
if
(
model
->
isLM
)
model
->
MakeLM
(
batchEnc
,
output
,
paddingEnc
,
true
);
...
...
@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
else
{
ShowNTErrors
(
"Illegal model type!"
);
}
forward
+=
GetClockSec
()
-
start
;
/* back-propagation for obtaining gradients */
//if (labelSmoothingP > 0)
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
start
=
GetClockSec
();
XTensor
labelOnehot
;
labelOnehot
=
IndexToOnehot
(
label
,
vSizeTgt
,
labelSmoothingP
);
...
...
@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
net
.
Backward
(
lossTensor
);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
gradStep
+=
1
;
loss
+=
prob
;
wordCount
+=
wc
;
...
...
@@ -248,11 +272,13 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
gradStep
=
0
;
validStep
++
;
update
+=
GetClockSec
()
-
start
;
}
}
else
nSkipped
++
;
train_time
+=
GetClockSec
()
-
time
;
if
(
++
step
>=
nstep
){
isEnd
=
true
;
break
;
...
...
@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT6
(
0
,
stderr
,
"[Time] elapsed=%.5lfs,mkinput=%.5lfs,train_time=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf
\n
"
,
elapsed
,
mkinput
,
train_time
,
forward
,
backward
,
update
);
XPRINT8
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f"
,
elapsed
,
step
,
epoch
,
wordCountTotal
,
wordCountBatch
,
loss
/
wordCount
,
exp
(
loss
/
wordCount
),
exp
(
prob
/
wc
));
if
(
!
doUpdate
)
XPRINT
(
0
,
stderr
,
" (no update)"
);
XPRINT
(
0
,
stderr
,
"
\n
"
);
mkinput
=
0.0
;
train_time
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
}
if
(
nStepCheckpoint
>
0
&&
++
nStepCheck
>=
nStepCheckpoint
){
...
...
source/tensor/XTensor.cpp
查看文件 @
1da50ae2
...
...
@@ -25,6 +25,7 @@
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
#include "halfLib/half/half.hpp"
#include <stdio.h>
#include <stdlib.h>
...
...
@@ -50,6 +51,11 @@
#include "function/Identity.h"
#include "core/CHeader.h"
//#include "halfLib/HalfFloat/umHalf.h"
#ifdef USE_CUDA
// the CUDA stuff
...
...
@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
XMemCopy
(
data
,
devID
,
tensor
.
data
,
tensor
.
devID
,
size
);
if
(
dataHost
!=
NULL
&&
tensor
.
dataHost
!=
NULL
)
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
}
else
{
DestroyData
();
...
...
@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
else
if
(
dataType
==
X_FLOAT16
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
)
{
halfCPU
f
=
((
halfCPU
*
)
d
)[
i
];
if
(
i
==
beg
)
fprintf
(
file
,
"%hx"
,
f
);
else
fprintf
(
file
,
" %hx"
,
f
);
}
}
else
if
(
dataType
==
X_INT
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
...
...
@@ -1900,9 +1917,22 @@ dump data to a file
*/
void
XTensor
::
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_CopyValues
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
if
(
tensor
->
dataType
==
X_FLOAT
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_CopyValues
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
if
(
tensor
->
dataType
==
X_FLOAT16
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
X_FLOAT
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_ConvertDataType
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
{
ShowNTErrors
(
"TO DO!"
);
}
}
/*
...
...
@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
else
if
(
dataType
==
X_FLOAT16
){
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
{
halfCPU
*
f
=
((
halfCPU
*
)
data
)
+
i
;
if
(
fscanf
(
file
,
"%hx"
,
f
)
<
1
)
{
ShowNTErrors
(
"Incorrect tensor format!"
);
}
}
}
else
{
ShowNTErrors
(
"TODO!"
);
}
...
...
@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
}
}
do
{
c
=
fgetc
(
file
);
}
while
(
c
!=
'\n'
&&
c
!=
EOF
);
XMemCopy
(
dataBackup
,
devID
,
data
,
-
1
,
GetDataSizeInChar
());
data
=
dataBackup
;
delete
[](
char
*
)
dataBuf
;
delete
[](
char
*
)
dataBuf
;
}
/*
...
...
source/tensor/core/utilities/FlushToMem.cu
查看文件 @
1da50ae2
...
...
@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
/* copy the data from GPU memory to CPU memory */
void CudaGPUToCPUFlush(XTensor * tensor)
{
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
//
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论