Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
00e5e46d
Commit
00e5e46d
authored
Aug 05, 2019
by
linye
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fnnlm float16 training supported, there remains som bugs and liutengbo need to fix them
parent
d2c7e39a
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
184 行增加
和
4 行删除
+184
-4
source/sample/fnnlm/FNNLM.cpp
+184
-4
没有找到文件。
source/sample/fnnlm/FNNLM.cpp
查看文件 @
00e5e46d
...
@@ -77,6 +77,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
...
@@ -77,6 +77,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
);
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
);
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiffLin
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
/*
/*
entry of the program
entry of the program
...
@@ -123,6 +124,8 @@ int FNNLMMain(int argc, const char ** argv)
...
@@ -123,6 +124,8 @@ int FNNLMMain(int argc, const char ** argv)
/* load arguments */
/* load arguments */
LoadArgs
(
argc
,
argv
,
model
);
LoadArgs
(
argc
,
argv
,
model
);
srand
(
1
);
/* check the setting */
/* check the setting */
Check
(
model
);
Check
(
model
);
...
@@ -543,11 +546,34 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -543,11 +546,34 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* forward + backward process */
/* forward + backward process */
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
//ForwardAutoDiff(ngrams, ngramNum, output, model);
ForwardAutoDiffLin
(
ngrams
,
ngramNum
,
output
,
model
);
//XNet net;
//net.ShowNetwork(stdout, &output);
//FILE* fOut1 = fopen("test-output", "w");
//output.Dump(&output, fOut1, "output");
//fclose(fOut1);
//fflush(fOut1);
//if (step==216)
//{
// exit(1);
//}
/* this is implemented by multiply function */
/* this is implemented by multiply function */
lossTensor
=
CrossEntropy
(
output
,
gold
);
lossTensor
=
CrossEntropy
(
output
,
gold
);
//FILE* fOut1 = fopen("test3", "a");
//fprintf(fOut1, "step=%d ", step);
//lossTensor.Dump(&lossTensor, fOut1, "lossTensor:");
//fclose(fOut1);
//fflush(fOut1);
int
stepTmp
=
step
+
1
;
/* automatic differentiation */
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
autoDiffer
.
Backward
(
lossTensor
);
...
@@ -567,7 +593,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -567,7 +593,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
break
;
break
;
}
}
if
(
step
%
1
00
==
0
)
{
if
(
step
%
1
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
double
elapsed
=
GetClockSec
()
-
startT
;
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
...
@@ -637,13 +663,21 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
...
@@ -637,13 +663,21 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
gradList
.
Add
(
model
.
embeddingW
.
grad
);
gradList
.
Add
(
model
.
embeddingW
.
grad
);
}
}
//FILE* fOut1 = fopen("test-2", "a");
for
(
int
i
=
0
;
i
<
paraList
.
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
paraList
.
count
;
i
++
)
{
XTensor
*
para
=
(
XTensor
*
)
paraList
.
GetItem
(
i
);
XTensor
*
para
=
(
XTensor
*
)
paraList
.
GetItem
(
i
);
XTensor
*
paraGrad
=
(
XTensor
*
)
gradList
.
GetItem
(
i
);
XTensor
*
paraGrad
=
(
XTensor
*
)
gradList
.
GetItem
(
i
);
//fprintf(fOut1, "id=%d ", para->id);
//para->Dump(para, fOut1, "para:", 50);
//paraGrad->Dump(paraGrad, fOut1, "paraGrad:", 50);
/* the delta rule */
/* the delta rule */
_Sum
(
para
,
paraGrad
,
para
,
-
epsilon
);
_Sum
(
para
,
paraGrad
,
para
,
-
epsilon
);
}
}
//fprintf(fOut1, "\n");
//fclose(fOut1);
//fflush(fOut1);
}
}
/*
/*
...
@@ -792,8 +826,16 @@ void InitZeroOneTensor2DFp16(XTensor &tensor, int rowNum, int colNum, int * rows
...
@@ -792,8 +826,16 @@ void InitZeroOneTensor2DFp16(XTensor &tensor, int rowNum, int colNum, int * rows
{
{
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT16
,
devID
);
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT16
,
devID
);
XTensor
tensor1
;
InitTensor2DV2
(
&
tensor1
,
rowNum
,
colNum
,
X_FLOAT
,
devID
);
tensor1
.
SetZeroAll
();
/* set none-zero cells */
/* set none-zero cells */
_SetDataFixed
(
&
tensor
,
1.0
);
for
(
int
i
=
0
;
i
<
itemNum
;
i
++
)
tensor1
.
Set2D
(
1.0
F
,
rows
[
i
],
cols
[
i
]);
_ConvertDataType
(
&
tensor1
,
&
tensor
);
}
}
/*
/*
...
@@ -1086,14 +1128,152 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
...
@@ -1086,14 +1128,152 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
hidden
=
Reshape
(
embeddingBig
,
embeddingBig
.
order
,
dimSize
);
hidden
=
Reshape
(
embeddingBig
,
embeddingBig
.
order
,
dimSize
);
/* hidden layers */
/* hidden layers */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
//XTensor hiddenBefore;
//hiddenBefore = MMul(hidden, model.hiddenW[i]) + model.hiddenB[i];
//if (hiddenBefore.dataType == X_FLOAT16) {
// XTensor hiddenBeforeFp32;
// hiddenBeforeFp32 = ConvertDataType(hiddenBefore, X_FLOAT);
// XTensor hiddenFp32;
// hiddenFp32 = HardTanH(hiddenBeforeFp32);
// hidden = ConvertDataType(hiddenFp32, X_FLOAT16);
//}
//else {
// hidden = HardTanH(hiddenBefore);
//}
hidden
=
HardTanH
(
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
]);
hidden
=
HardTanH
(
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
]);
}
/* output layer */
/* output layer */
//output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//XTensor softmaxBefore;
//softmaxBefore = MMul(hidden, model.outputW) + model.outputB;
//if (softmaxBefore.dataType == X_FLOAT16) {
// XTensor softmaxBeforeFp32;
// softmaxBeforeFp32 = ConvertDataType(softmaxBefore, X_FLOAT);
// XTensor outputeFp32;
// outputeFp32 = Softmax(softmaxBeforeFp32, 1);
// output = ConvertDataType(outputeFp32, X_FLOAT16);
//}
//else {
// output = Softmax(softmaxBefore, 1);
//}
output
=
Softmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
output
=
Softmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
}
}
void
ForwardAutoDiffLin
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
)
{
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
XTensor
words
;
XTensor
embeddingBig
;
XTensor
hidden
;
XTensor
b
;
int
size
=
batch
*
(
n
-
1
);
int
*
index
=
new
int
[
size
];
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
-
1
;
j
++
)
{
int
a
=
i
*
(
n
-
1
)
+
j
;
index
[
a
]
=
ngrams
[
i
].
words
[
j
];
}
}
InitTensor1DV2
(
&
words
,
size
,
X_INT
,
model
.
devID
);
words
.
SetData
(
index
,
size
);
/*test for Gather float16 datatype backward*/
//XTensor embeddingW16;
//XTensor embeddingBig16;
//embeddingW16 = ConvertDataType(model.embeddingW, X_FLOAT16);
//embeddingBig16 = Gather(embeddingW16, words);
//embeddingBig = ConvertDataType(embeddingBig16, X_FLOAT);
embeddingBig
=
Gather
(
model
.
embeddingW
,
words
);
delete
[]
index
;
int
dimSize
[
2
];
dimSize
[
0
]
=
embeddingBig
.
GetDim
(
0
)
/
(
n
-
1
);
dimSize
[
1
]
=
embeddingBig
.
GetDim
(
1
)
*
(
n
-
1
);
/*test for Reshape float16 datatype backward*/
//XTensor embeddingBig16;
//XTensor hidden16;
//embeddingBig16 = ConvertDataType(embeddingBig, X_FLOAT16);
//hidden16 = Reshape(embeddingBig16, embeddingBig16.order, dimSize);
//hidden = ConvertDataType(hidden16, X_FLOAT);
hidden
=
Reshape
(
embeddingBig
,
embeddingBig
.
order
,
dimSize
);
/* hidden layers */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
/*test for MMul float16 backward*/
//XTensor hiddenW16;
//XTensor hidden16;
//XTensor hiddenBefore16;
//XTensor hiddenBefore;
//hiddenW16 = ConvertDataType(model.hiddenW[i], X_FLOAT16);
//hidden16 = ConvertDataType(hidden, X_FLOAT16);
//hiddenBefore16 = MMul(hidden16, hiddenW16);
//hiddenBefore = ConvertDataType(hiddenBefore16, X_FLOAT);
//hidden = HardTanH(hiddenBefore + model.hiddenB[i]);
/*test for HardTanH and Sum float16 backward*/
//XTensor hiddenBefore;
//XTensor hiddenBefore16;
//XTensor hiddenB16;
//XTensor hidden16;
//hiddenBefore = MMul(hidden, model.hiddenW[i]);
//hiddenBefore16 = ConvertDataType(hiddenBefore,X_FLOAT16);
//hiddenB16 = ConvertDataType(model.hiddenB[i], X_FLOAT16);
//hidden16 = HardTanH(hiddenBefore16 + hiddenB16);
//hidden = ConvertDataType(hidden16, X_FLOAT);
hidden
=
HardTanH
(
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
]);
}
/* output layer */
/*test for MMul float16 backward*/
//XTensor outputW16;
//XTensor hidden16;
//XTensor outputBefore16;
//XTensor outputBefore;
//outputW16 = ConvertDataType(model.outputW, X_FLOAT16);
//hidden16 = ConvertDataType(hidden, X_FLOAT16);
//outputBefore16 = MMul(hidden16, outputW16);
//outputBefore = ConvertDataType(outputBefore16, X_FLOAT);
//output = Softmax(outputBefore + model.outputB, 1);
/*test for and Sum float16 backward*/
//XTensor outputBefore;
//XTensor outputBefore16;
//XTensor outputB16;
//XTensor output16;
//XTensor softmaxBefore16;
//XTensor softmaxBefore;
//outputBefore = MMul(hidden, model.outputW);
//outputBefore16 = ConvertDataType(outputBefore, X_FLOAT16);
//outputB16 = ConvertDataType(model.outputB, X_FLOAT16);
//softmaxBefore16 = outputBefore16 + outputB16;
//softmaxBefore = ConvertDataType(softmaxBefore16, X_FLOAT);
//output = Softmax(softmaxBefore, 1);
/*test for Softmax and Sum float16 backward*/
XTensor
softmaxBefore
;
XTensor
softmaxBefore16
;
XTensor
output16
;
softmaxBefore
=
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
;
softmaxBefore16
=
ConvertDataType
(
softmaxBefore
,
X_FLOAT16
);
output16
=
Softmax
(
softmaxBefore16
,
1
);
output
=
ConvertDataType
(
output16
,
X_FLOAT
);
//output = Softmax(MMul(hidden, model.outputW) + model.outputB, 1);
}
/*
/*
forward process (with tensor connections) (this is implemented by multiply function)
forward process (with tensor connections) (this is implemented by multiply function)
>> inputs - input word representations
>> inputs - input word representations
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论