Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
1da50ae2
Commit
1da50ae2
authored
Aug 05, 2019
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
using cpu float16 and test fnn and t2t times
parent
29d2352b
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
131 行增加
和
25 行删除
+131
-25
source/network/Main.cpp
+0
-0
source/sample/fnnlm/FNNLM.cpp
+45
-9
source/sample/transformer/T2TTrainer.cpp
+43
-9
source/tensor/XTensor.cpp
+42
-6
source/tensor/core/utilities/FlushToMem.cu
+1
-1
没有找到文件。
source/network/Main.cpp
查看文件 @
1da50ae2
差异被折叠。
点击展开。
source/sample/fnnlm/FNNLM.cpp
查看文件 @
1da50ae2
...
@@ -415,7 +415,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -415,7 +415,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XNet
autoDiffer
;
XNet
autoDiffer
;
double
startT
=
GetClockSec
();
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
mkgold
=
0.0
;
double
train_time
=
0.0
;
double
clearModel
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
end
=
0.0
;
double
start
=
0.0
;
double
time
;
/* iterate for a number of epochs */
/* iterate for a number of epochs */
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
){
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
){
...
@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
wordCount
=
0
;
wordCount
=
0
;
loss
=
0
;
loss
=
0
;
ngramNum
=
1
;
ngramNum
=
1
;
while
(
ngramNum
>
0
){
while
(
ngramNum
>
0
){
/* load a minibatch of ngrams */
/* load a minibatch of ngrams */
...
@@ -447,20 +458,25 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -447,20 +458,25 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the loss tensor */
/* the loss tensor */
XTensor
lossTensor
;
XTensor
lossTensor
;
start
=
GetClockSec
();
/* make the input tensor for position i */
/* make the input tensor for position i */
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
);
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
);
mkinput
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* make the gold tensor */
/* make the gold tensor */
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
);
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
);
mkgold
+=
GetClockSec
()
-
start
;
time
=
GetClockSec
();
if
(
!
autoDiff
){
if
(
!
autoDiff
){
/* prepare an empty network for building the fnn */
/* prepare an empty network for building the fnn */
FNNNet
net
;
FNNNet
net
;
/* gradident = 0 */
/* gradident = 0 */
Clear
(
grad
,
false
);
Clear
(
grad
,
false
);
/* forward computation */
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
Forward
(
inputs
,
output
,
model
,
net
);
...
@@ -475,40 +491,60 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -475,40 +491,60 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss
-=
prob
;
loss
-=
prob
;
}
}
else
{
else
{
start
=
GetClockSec
();
/* gradient = 0 */
/* gradient = 0 */
Clear
(
model
,
true
);
Clear
(
model
,
true
);
clearModel
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* forward + backward process */
/* forward + backward process */
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
forward
+=
GetClockSec
()
-
start
;
/* this is implemented by multiply function */
start
=
GetClockSec
();
/* this is implemented by multiply function */
lossTensor
=
CrossEntropy
(
output
,
gold
);
lossTensor
=
CrossEntropy
(
output
,
gold
);
/* automatic differentiation */
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
autoDiffer
.
Backward
(
lossTensor
);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* update model parameters */
/* update model parameters */
Update
(
model
,
grad
,
learningRate
,
true
);
Update
(
model
,
grad
,
learningRate
,
true
);
update
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* get probabilities */
/* get probabilities */
float
prob
=
ReduceSumAll
(
lossTensor
);
float
prob
=
ReduceSumAll
(
lossTensor
);
loss
+=
prob
;
loss
+=
prob
;
end
+=
GetClockSec
()
-
start
;
}
}
train_time
+=
GetClockSec
()
-
time
;
wordCount
+=
ngramNum
;
wordCount
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
if
(
++
step
>=
nStep
){
if
(
++
step
>=
nStep
){
isEnd
=
true
;
isEnd
=
true
;
break
;
break
;
}
}
if
(
step
%
100
==
0
)
{
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT8
(
0
,
stderr
,
"[Time] mkinput=%.5lfs,mkgold=%.5lfs,train_time=%.5lfs,clearModel=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf, end=%.5lf
\n
"
,
mkinput
,
mkgold
,
train_time
,
clearModel
,
forward
,
backward
,
update
,
end
);
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
mkinput
=
0.0
;
mkgold
=
0.0
;
train_time
=
0.0
;
clearModel
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
end
=
0.0
;
}
}
}
}
...
...
source/sample/transformer/T2TTrainer.cpp
查看文件 @
1da50ae2
...
@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
double
startT
=
GetClockSec
();
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
train_time
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
start
=
0.0
;
double
time
=
0.0
;
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
#ifndef WIN32
#ifndef WIN32
if
(
isShuffled
)
if
(
isShuffled
)
...
@@ -176,18 +184,31 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -176,18 +184,31 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
/* label smoothed gold standard (if needed) */
/* label smoothed gold standard (if needed) */
XTensor
goldSmoothed
;
XTensor
goldSmoothed
;
while
(
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
NULL
,
vSize
,
vSizeTgt
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
))
{
//while (batchLoader.LoadBatch(file, model->isLM,
// &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
// NULL, vSize, vSizeTgt,
// sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
while
(
true
)
{
start
=
GetClockSec
();
int
batch
=
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
NULL
,
vSize
,
vSizeTgt
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
);
mkinput
+=
GetClockSec
()
-
start
;
if
(
!
batch
)
{
break
;
}
time
=
GetClockSec
();
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
/* output probabilities */
/* output probabilities */
XTensor
output
;
XTensor
output
;
start
=
GetClockSec
();
/* make the network */
/* make the network */
if
(
model
->
isLM
)
if
(
model
->
isLM
)
model
->
MakeLM
(
batchEnc
,
output
,
paddingEnc
,
true
);
model
->
MakeLM
(
batchEnc
,
output
,
paddingEnc
,
true
);
...
@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
else
{
else
{
ShowNTErrors
(
"Illegal model type!"
);
ShowNTErrors
(
"Illegal model type!"
);
}
}
forward
+=
GetClockSec
()
-
start
;
/* back-propagation for obtaining gradients */
/* back-propagation for obtaining gradients */
//if (labelSmoothingP > 0)
//if (labelSmoothingP > 0)
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
start
=
GetClockSec
();
XTensor
labelOnehot
;
XTensor
labelOnehot
;
labelOnehot
=
IndexToOnehot
(
label
,
vSizeTgt
,
labelSmoothingP
);
labelOnehot
=
IndexToOnehot
(
label
,
vSizeTgt
,
labelSmoothingP
);
...
@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
net
.
Backward
(
lossTensor
);
net
.
Backward
(
lossTensor
);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
gradStep
+=
1
;
gradStep
+=
1
;
loss
+=
prob
;
loss
+=
prob
;
wordCount
+=
wc
;
wordCount
+=
wc
;
...
@@ -248,11 +272,13 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -248,11 +272,13 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
gradStep
=
0
;
gradStep
=
0
;
validStep
++
;
validStep
++
;
update
+=
GetClockSec
()
-
start
;
}
}
}
}
else
else
nSkipped
++
;
nSkipped
++
;
train_time
+=
GetClockSec
()
-
time
;
if
(
++
step
>=
nstep
){
if
(
++
step
>=
nstep
){
isEnd
=
true
;
isEnd
=
true
;
break
;
break
;
...
@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if
(
step
%
100
==
0
)
{
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT6
(
0
,
stderr
,
"[Time] elapsed=%.5lfs,mkinput=%.5lfs,train_time=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf
\n
"
,
elapsed
,
mkinput
,
train_time
,
forward
,
backward
,
update
);
XPRINT8
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f"
,
XPRINT8
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f"
,
elapsed
,
step
,
epoch
,
wordCountTotal
,
wordCountBatch
,
loss
/
wordCount
,
exp
(
loss
/
wordCount
),
exp
(
prob
/
wc
));
elapsed
,
step
,
epoch
,
wordCountTotal
,
wordCountBatch
,
loss
/
wordCount
,
exp
(
loss
/
wordCount
),
exp
(
prob
/
wc
));
if
(
!
doUpdate
)
if
(
!
doUpdate
)
XPRINT
(
0
,
stderr
,
" (no update)"
);
XPRINT
(
0
,
stderr
,
" (no update)"
);
XPRINT
(
0
,
stderr
,
"
\n
"
);
XPRINT
(
0
,
stderr
,
"
\n
"
);
mkinput
=
0.0
;
train_time
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
}
}
if
(
nStepCheckpoint
>
0
&&
++
nStepCheck
>=
nStepCheckpoint
){
if
(
nStepCheckpoint
>
0
&&
++
nStepCheck
>=
nStepCheckpoint
){
...
...
source/tensor/XTensor.cpp
查看文件 @
1da50ae2
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*
*/
*/
#include "halfLib/half/half.hpp"
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
...
@@ -50,6 +51,11 @@
...
@@ -50,6 +51,11 @@
#include "function/Identity.h"
#include "function/Identity.h"
#include "core/CHeader.h"
#include "core/CHeader.h"
//#include "halfLib/HalfFloat/umHalf.h"
#ifdef USE_CUDA
#ifdef USE_CUDA
// the CUDA stuff
// the CUDA stuff
...
@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
...
@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
XMemCopy
(
data
,
devID
,
tensor
.
data
,
tensor
.
devID
,
size
);
XMemCopy
(
data
,
devID
,
tensor
.
data
,
tensor
.
devID
,
size
);
if
(
dataHost
!=
NULL
&&
tensor
.
dataHost
!=
NULL
)
if
(
dataHost
!=
NULL
&&
tensor
.
dataHost
!=
NULL
)
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
}
}
else
{
else
{
DestroyData
();
DestroyData
();
...
@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
...
@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
}
}
else
if
(
dataType
==
X_FLOAT16
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
)
{
halfCPU
f
=
((
halfCPU
*
)
d
)[
i
];
if
(
i
==
beg
)
fprintf
(
file
,
"%hx"
,
f
);
else
fprintf
(
file
,
" %hx"
,
f
);
}
}
else
if
(
dataType
==
X_INT
)
{
else
if
(
dataType
==
X_INT
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
...
@@ -1900,9 +1917,22 @@ dump data to a file
...
@@ -1900,9 +1917,22 @@ dump data to a file
*/
*/
void
XTensor
::
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
void
XTensor
::
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
{
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
if
(
tensor
->
dataType
==
X_FLOAT
)
_CopyValues
(
tensor
,
&
a
);
{
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_CopyValues
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
if
(
tensor
->
dataType
==
X_FLOAT16
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
X_FLOAT
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_ConvertDataType
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
{
ShowNTErrors
(
"TO DO!"
);
}
}
}
/*
/*
...
@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
...
@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
}
}
}
else
if
(
dataType
==
X_FLOAT16
){
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
{
halfCPU
*
f
=
((
halfCPU
*
)
data
)
+
i
;
if
(
fscanf
(
file
,
"%hx"
,
f
)
<
1
)
{
ShowNTErrors
(
"Incorrect tensor format!"
);
}
}
}
else
{
else
{
ShowNTErrors
(
"TODO!"
);
ShowNTErrors
(
"TODO!"
);
}
}
...
@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
...
@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
}
do
{
do
{
c
=
fgetc
(
file
);
c
=
fgetc
(
file
);
}
while
(
c
!=
'\n'
&&
c
!=
EOF
);
}
while
(
c
!=
'\n'
&&
c
!=
EOF
);
XMemCopy
(
dataBackup
,
devID
,
data
,
-
1
,
GetDataSizeInChar
());
XMemCopy
(
dataBackup
,
devID
,
data
,
-
1
,
GetDataSizeInChar
());
data
=
dataBackup
;
data
=
dataBackup
;
delete
[](
char
*
)
dataBuf
;
delete
[](
char
*
)
dataBuf
;
}
}
/*
/*
...
...
source/tensor/core/utilities/FlushToMem.cu
查看文件 @
1da50ae2
...
@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
...
@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
/* copy the data from GPU memory to CPU memory */
/* copy the data from GPU memory to CPU memory */
void CudaGPUToCPUFlush(XTensor * tensor)
void CudaGPUToCPUFlush(XTensor * tensor)
{
{
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
//
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
if (tensor->dataHost != NULL)
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
delete[](char*)tensor->dataHost;
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论