Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
1da50ae2
Commit
1da50ae2
authored
5 years ago
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
using cpu float16 and test fnn and t2t times
parent
29d2352b
全部展开
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
117 行增加
和
11 行删除
+117
-11
source/network/Main.cpp
+0
-0
source/sample/fnnlm/FNNLM.cpp
+39
-3
source/sample/transformer/T2TTrainer.cpp
+38
-4
source/tensor/XTensor.cpp
+39
-3
source/tensor/core/utilities/FlushToMem.cu
+1
-1
没有找到文件。
source/network/Main.cpp
查看文件 @
1da50ae2
差异被折叠。
点击展开。
source/sample/fnnlm/FNNLM.cpp
查看文件 @
1da50ae2
...
@@ -416,6 +416,18 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -416,6 +416,18 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
double
startT
=
GetClockSec
();
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
mkgold
=
0.0
;
double
train_time
=
0.0
;
double
clearModel
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
end
=
0.0
;
double
start
=
0.0
;
double
time
;
/* iterate for a number of epochs */
/* iterate for a number of epochs */
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
){
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
){
...
@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
wordCount
=
0
;
wordCount
=
0
;
loss
=
0
;
loss
=
0
;
ngramNum
=
1
;
ngramNum
=
1
;
while
(
ngramNum
>
0
){
while
(
ngramNum
>
0
){
/* load a minibatch of ngrams */
/* load a minibatch of ngrams */
...
@@ -447,13 +458,18 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -447,13 +458,18 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the loss tensor */
/* the loss tensor */
XTensor
lossTensor
;
XTensor
lossTensor
;
start
=
GetClockSec
();
/* make the input tensor for position i */
/* make the input tensor for position i */
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
);
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
);
mkinput
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* make the gold tensor */
/* make the gold tensor */
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
);
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
);
mkgold
+=
GetClockSec
()
-
start
;
time
=
GetClockSec
();
if
(
!
autoDiff
){
if
(
!
autoDiff
){
/* prepare an empty network for building the fnn */
/* prepare an empty network for building the fnn */
FNNNet
net
;
FNNNet
net
;
...
@@ -475,28 +491,37 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -475,28 +491,37 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss
-=
prob
;
loss
-=
prob
;
}
}
else
{
else
{
start
=
GetClockSec
();
/* gradient = 0 */
/* gradient = 0 */
Clear
(
model
,
true
);
Clear
(
model
,
true
);
clearModel
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* forward + backward process */
/* forward + backward process */
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
forward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* this is implemented by multiply function */
/* this is implemented by multiply function */
lossTensor
=
CrossEntropy
(
output
,
gold
);
lossTensor
=
CrossEntropy
(
output
,
gold
);
/* automatic differentiation */
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
autoDiffer
.
Backward
(
lossTensor
);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* update model parameters */
/* update model parameters */
Update
(
model
,
grad
,
learningRate
,
true
);
Update
(
model
,
grad
,
learningRate
,
true
);
update
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
/* get probabilities */
/* get probabilities */
float
prob
=
ReduceSumAll
(
lossTensor
);
float
prob
=
ReduceSumAll
(
lossTensor
);
loss
+=
prob
;
loss
+=
prob
;
end
+=
GetClockSec
()
-
start
;
}
}
train_time
+=
GetClockSec
()
-
time
;
wordCount
+=
ngramNum
;
wordCount
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
...
@@ -507,8 +532,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -507,8 +532,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
if
(
step
%
100
==
0
)
{
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT8
(
0
,
stderr
,
"[Time] mkinput=%.5lfs,mkgold=%.5lfs,train_time=%.5lfs,clearModel=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf, end=%.5lf
\n
"
,
mkinput
,
mkgold
,
train_time
,
clearModel
,
forward
,
backward
,
update
,
end
);
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
mkinput
=
0.0
;
mkgold
=
0.0
;
train_time
=
0.0
;
clearModel
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
end
=
0.0
;
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
source/sample/transformer/T2TTrainer.cpp
查看文件 @
1da50ae2
...
@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
double
startT
=
GetClockSec
();
double
startT
=
GetClockSec
();
double
mkinput
=
0.0
;
double
train_time
=
0.0
;
double
forward
=
0.0
;
double
backward
=
0.0
;
double
update
=
0.0
;
double
start
=
0.0
;
double
time
=
0.0
;
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
#ifndef WIN32
#ifndef WIN32
if
(
isShuffled
)
if
(
isShuffled
)
...
@@ -177,17 +185,30 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -177,17 +185,30 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
/* label smoothed gold standard (if needed) */
/* label smoothed gold standard (if needed) */
XTensor
goldSmoothed
;
XTensor
goldSmoothed
;
while
(
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
//while (batchLoader.LoadBatch(file, model->isLM,
// &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
// NULL, vSize, vSizeTgt,
// sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
while
(
true
)
{
start
=
GetClockSec
();
int
batch
=
batchLoader
.
LoadBatch
(
file
,
model
->
isLM
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
&
batchEnc
,
&
paddingEnc
,
&
batchDec
,
&
paddingDec
,
&
gold
,
&
label
,
NULL
,
vSize
,
vSizeTgt
,
NULL
,
vSize
,
vSizeTgt
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
))
sBatchSize
,
wBatchSize
,
isLenSorted
,
ws
,
wc
,
devID
,
true
);
{
mkinput
+=
GetClockSec
()
-
start
;
if
(
!
batch
)
{
break
;
}
time
=
GetClockSec
();
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
/* output probabilities */
/* output probabilities */
XTensor
output
;
XTensor
output
;
start
=
GetClockSec
();
/* make the network */
/* make the network */
if
(
model
->
isLM
)
if
(
model
->
isLM
)
model
->
MakeLM
(
batchEnc
,
output
,
paddingEnc
,
true
);
model
->
MakeLM
(
batchEnc
,
output
,
paddingEnc
,
true
);
...
@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
else
{
else
{
ShowNTErrors
(
"Illegal model type!"
);
ShowNTErrors
(
"Illegal model type!"
);
}
}
forward
+=
GetClockSec
()
-
start
;
/* back-propagation for obtaining gradients */
/* back-propagation for obtaining gradients */
//if (labelSmoothingP > 0)
//if (labelSmoothingP > 0)
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
start
=
GetClockSec
();
XTensor
labelOnehot
;
XTensor
labelOnehot
;
labelOnehot
=
IndexToOnehot
(
label
,
vSizeTgt
,
labelSmoothingP
);
labelOnehot
=
IndexToOnehot
(
label
,
vSizeTgt
,
labelSmoothingP
);
...
@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
net
.
Backward
(
lossTensor
);
net
.
Backward
(
lossTensor
);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
backward
+=
GetClockSec
()
-
start
;
start
=
GetClockSec
();
gradStep
+=
1
;
gradStep
+=
1
;
loss
+=
prob
;
loss
+=
prob
;
wordCount
+=
wc
;
wordCount
+=
wc
;
...
@@ -248,10 +272,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -248,10 +272,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
gradStep
=
0
;
gradStep
=
0
;
validStep
++
;
validStep
++
;
update
+=
GetClockSec
()
-
start
;
}
}
}
}
else
else
nSkipped
++
;
nSkipped
++
;
train_time
+=
GetClockSec
()
-
time
;
if
(
++
step
>=
nstep
){
if
(
++
step
>=
nstep
){
isEnd
=
true
;
isEnd
=
true
;
...
@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
...
@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if
(
step
%
100
==
0
)
{
if
(
step
%
100
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
double
elapsed
=
GetClockSec
()
-
startT
;
startT
=
GetClockSec
();
XPRINT6
(
0
,
stderr
,
"[Time] elapsed=%.5lfs,mkinput=%.5lfs,train_time=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf
\n
"
,
elapsed
,
mkinput
,
train_time
,
forward
,
backward
,
update
);
XPRINT8
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f"
,
XPRINT8
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f"
,
elapsed
,
step
,
epoch
,
wordCountTotal
,
wordCountBatch
,
loss
/
wordCount
,
exp
(
loss
/
wordCount
),
exp
(
prob
/
wc
));
elapsed
,
step
,
epoch
,
wordCountTotal
,
wordCountBatch
,
loss
/
wordCount
,
exp
(
loss
/
wordCount
),
exp
(
prob
/
wc
));
if
(
!
doUpdate
)
if
(
!
doUpdate
)
XPRINT
(
0
,
stderr
,
" (no update)"
);
XPRINT
(
0
,
stderr
,
" (no update)"
);
XPRINT
(
0
,
stderr
,
"
\n
"
);
XPRINT
(
0
,
stderr
,
"
\n
"
);
mkinput
=
0.0
;
train_time
=
0.0
;
forward
=
0.0
;
backward
=
0.0
;
update
=
0.0
;
}
}
if
(
nStepCheckpoint
>
0
&&
++
nStepCheck
>=
nStepCheckpoint
){
if
(
nStepCheckpoint
>
0
&&
++
nStepCheck
>=
nStepCheckpoint
){
...
...
This diff is collapsed.
Click to expand it.
source/tensor/XTensor.cpp
查看文件 @
1da50ae2
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*
*/
*/
#include "halfLib/half/half.hpp"
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
...
@@ -50,6 +51,11 @@
...
@@ -50,6 +51,11 @@
#include "function/Identity.h"
#include "function/Identity.h"
#include "core/CHeader.h"
#include "core/CHeader.h"
//#include "halfLib/HalfFloat/umHalf.h"
#ifdef USE_CUDA
#ifdef USE_CUDA
// the CUDA stuff
// the CUDA stuff
...
@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
...
@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
XMemCopy
(
data
,
devID
,
tensor
.
data
,
tensor
.
devID
,
size
);
XMemCopy
(
data
,
devID
,
tensor
.
data
,
tensor
.
devID
,
size
);
if
(
dataHost
!=
NULL
&&
tensor
.
dataHost
!=
NULL
)
if
(
dataHost
!=
NULL
&&
tensor
.
dataHost
!=
NULL
)
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
XMemCopy
(
dataHost
,
-
1
,
tensor
.
dataHost
,
tensor
.
devID
,
size
);
}
}
else
{
else
{
DestroyData
();
DestroyData
();
...
@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
...
@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
}
}
else
if
(
dataType
==
X_FLOAT16
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
)
{
halfCPU
f
=
((
halfCPU
*
)
d
)[
i
];
if
(
i
==
beg
)
fprintf
(
file
,
"%hx"
,
f
);
else
fprintf
(
file
,
" %hx"
,
f
);
}
}
else
if
(
dataType
==
X_INT
)
{
else
if
(
dataType
==
X_INT
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
...
@@ -1900,9 +1917,22 @@ dump data to a file
...
@@ -1900,9 +1917,22 @@ dump data to a file
*/
*/
void
XTensor
::
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
void
XTensor
::
Dump
(
const
XTensor
*
tensor
,
FILE
*
file
,
const
char
*
label
,
const
int
n
,
const
int
beg
,
const
int
verbose
)
{
{
if
(
tensor
->
dataType
==
X_FLOAT
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_CopyValues
(
tensor
,
&
a
);
_CopyValues
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
if
(
tensor
->
dataType
==
X_FLOAT16
)
{
XTensor
a
(
tensor
->
order
,
tensor
->
dimSize
,
X_FLOAT
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
_ConvertDataType
(
tensor
,
&
a
);
a
.
Dump
(
file
,
label
,
n
,
beg
,
verbose
);
}
else
{
ShowNTErrors
(
"TO DO!"
);
}
}
}
/*
/*
...
@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
...
@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
}
}
}
else
if
(
dataType
==
X_FLOAT16
){
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
{
halfCPU
*
f
=
((
halfCPU
*
)
data
)
+
i
;
if
(
fscanf
(
file
,
"%hx"
,
f
)
<
1
)
{
ShowNTErrors
(
"Incorrect tensor format!"
);
}
}
}
else
{
else
{
ShowNTErrors
(
"TODO!"
);
ShowNTErrors
(
"TODO!"
);
}
}
...
@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
...
@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
}
do
{
do
{
c
=
fgetc
(
file
);
c
=
fgetc
(
file
);
}
while
(
c
!=
'\n'
&&
c
!=
EOF
);
}
while
(
c
!=
'\n'
&&
c
!=
EOF
);
XMemCopy
(
dataBackup
,
devID
,
data
,
-
1
,
GetDataSizeInChar
());
XMemCopy
(
dataBackup
,
devID
,
data
,
-
1
,
GetDataSizeInChar
());
data
=
dataBackup
;
data
=
dataBackup
;
delete
[](
char
*
)
dataBuf
;
delete
[](
char
*
)
dataBuf
;
}
}
/*
/*
...
...
This diff is collapsed.
Click to expand it.
source/tensor/core/utilities/FlushToMem.cu
查看文件 @
1da50ae2
...
@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
...
@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
/* copy the data from GPU memory to CPU memory */
/* copy the data from GPU memory to CPU memory */
void CudaGPUToCPUFlush(XTensor * tensor)
void CudaGPUToCPUFlush(XTensor * tensor)
{
{
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
//
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
if (tensor->dataHost != NULL)
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
delete[](char*)tensor->dataHost;
...
...
This diff is collapsed.
Click to expand it.
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论