Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
b405b50e
Commit
b405b50e
authored
Aug 04, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bug fixes
parent
2a7e0de5
显示空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
56 行增加
和
28 行删除
+56
-28
source/network/XBackwardMath.cpp
+0
-1
source/sample/transformer/T2TAttention.cpp
+1
-1
source/sample/transformer/T2TEmbedding.cpp
+2
-2
source/sample/transformer/T2TEncoder.cpp
+2
-8
source/sample/transformer/T2TFNN.cpp
+9
-5
source/sample/transformer/T2TModel.cpp
+2
-1
source/sample/transformer/T2TOutput.cpp
+4
-1
source/sample/transformer/T2TTrainer.cpp
+28
-7
source/sample/transformer/T2TTrainer.h
+7
-1
source/tensor/XTensor.cpp
+1
-1
没有找到文件。
source/network/XBackwardMath.cpp
查看文件 @
b405b50e
...
...
@@ -381,7 +381,6 @@ void XMathGrad::GradMatrixMulBatched(XTensor * node)
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
XTensor
*
c
=
node
;
XTensor
*
dedc
=
node
->
grad
;
XTensor
*
deda
=
a
->
grad
;
XTensor
*
dedb
=
b
->
grad
;
...
...
source/sample/transformer/T2TAttention.cpp
查看文件 @
b405b50e
...
...
@@ -60,7 +60,7 @@ void T2TAttention::InitModel(int argc, const char ** argv, int myDevID, XMem * m
LoadParamInt
(
argc
,
argv
,
"d"
,
&
dk
,
DEFAULT_BEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
dv
,
DEFAULT_BEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
d
,
DEFAULT_BEDDING_SIZE
);
LoadParamFloat
(
argc
,
argv
,
"attminmax"
,
&
minmax
,
0.
08
F
);
LoadParamFloat
(
argc
,
argv
,
"attminmax"
,
&
minmax
,
0.
1
F
);
InitTensor2D
(
&
wk
,
d
,
dk
,
X_FLOAT
,
devID
,
mem
);
InitTensor2D
(
&
wq
,
d
,
dk
,
X_FLOAT
,
devID
,
mem
);
...
...
source/sample/transformer/T2TEmbedding.cpp
查看文件 @
b405b50e
...
...
@@ -62,7 +62,7 @@ void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * my
InitTensor2D
(
&
w
,
vSize
,
eSize
,
X_FLOAT
,
devID
,
mem
);
w
.
SetDataRandn
(
0
,
sqrt
((
float
)
eSize
));
w
.
SetDataRandn
(
0
,
1
/
sqrt
((
float
)
eSize
));
/* create the positional embedding matrix */
MakePosEmbedding
(
eSize
,
d
,
maxLength
);
...
...
@@ -135,7 +135,7 @@ XTensor T2TEmbedder::Make(XTensor &input)
XTensor
wordEmbedding
;
/* then we make word embeddings */
wordEmbedding
=
MMul
(
&
input
,
w
);
wordEmbedding
=
MMul
(
input
,
w
);
/* we sum over the two embeddings */
return
wordEmbedding
+
posEmbedding
;
...
...
source/sample/transformer/T2TEncoder.cpp
查看文件 @
b405b50e
...
...
@@ -103,10 +103,7 @@ XTensor AttEncoder::Make(XTensor &input)
/* TODO: dropout */
/* layer normalization */
ln
=
layerNorms
[
i
].
Make
(
res
);
/* input of next layer */
x
=
ln
;
x
=
layerNorms
[
i
].
Make
(
res
);
/* fnn */
fnn
=
fnns
[
i
].
Make
(
x
);
...
...
@@ -117,10 +114,7 @@ XTensor AttEncoder::Make(XTensor &input)
/* TODO: dropout */
/* layer normalization */
ln
=
layerNorms
[
i
].
Make
(
res
);
/* input of next layer */
x
=
ln
;
x
=
layerNorms
[
i
].
Make
(
res
);
}
return
x
;
...
...
source/sample/transformer/T2TFNN.cpp
查看文件 @
b405b50e
...
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <math.h>
#include "T2TFNN.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
...
...
@@ -58,7 +59,7 @@ void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem)
LoadParamInt
(
argc
,
argv
,
"d"
,
&
inSize
,
DEFAULT_BEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
outSize
,
DEFAULT_BEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"fnnh"
,
&
hSize
,
DEFAULT_BEDDING_SIZE
);
LoadParamFloat
(
argc
,
argv
,
"fnnminmax"
,
&
minmax
,
0.
08
F
);
LoadParamFloat
(
argc
,
argv
,
"fnnminmax"
,
&
minmax
,
0.
1
F
);
InitTensor2D
(
&
w1
,
inSize
,
hSize
,
X_FLOAT
,
devID
,
mem
);
InitTensor1D
(
&
b1
,
hSize
,
X_FLOAT
,
devID
,
mem
);
...
...
@@ -66,10 +67,13 @@ void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem)
InitTensor2D
(
&
w2
,
hSize
,
outSize
,
X_FLOAT
,
devID
,
mem
);
InitTensor1D
(
&
b2
,
outSize
,
X_FLOAT
,
devID
,
mem
);
w1
.
SetDataRand
(
-
minmax
,
minmax
);
b1
.
SetDataRand
(
-
minmax
,
minmax
);
w2
.
SetDataRand
(
-
minmax
,
minmax
);
b2
.
SetDataRand
(
-
minmax
,
minmax
);
float
finfout1
=
sqrt
(
6
/
(
inSize
+
hSize
));
float
finfout2
=
sqrt
(
6
/
(
hSize
+
outSize
));
w1
.
SetDataRand
(
-
finfout1
,
finfout1
);
b1
.
SetZeroAll
();
w2
.
SetDataRand
(
-
finfout2
,
finfout2
);
b2
.
SetZeroAll
();
}
/*
...
...
source/sample/transformer/T2TModel.cpp
查看文件 @
b405b50e
...
...
@@ -22,6 +22,7 @@
#include "T2TModel.h"
#include "T2TUtility.h"
#include "../../tensor/core/CHeader.h"
namespace
transformer
{
...
...
@@ -81,9 +82,9 @@ make the entire network (with the output softmax layer)
*/
void
T2TModel
::
Make
(
XTensor
&
input
,
XTensor
&
output
)
{
if
(
isLM
){
XTensor
encoding
;
if
(
isLM
){
encoding
=
MakeEncoding
(
input
);
outputLayer
.
Make
(
encoding
,
output
);
}
...
...
source/sample/transformer/T2TOutput.cpp
查看文件 @
b405b50e
...
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
#include <math.h>
#include "T2TOutput.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
...
...
@@ -61,7 +62,9 @@ void T2TOutput::InitModel(int argc, const char ** argv, int myDevID, XMem * myMe
LoadParamFloat
(
argc
,
argv
,
"outputminmax"
,
&
minmax
,
0.08
F
);
InitTensor2D
(
&
w
,
hSize
,
vSize
,
X_FLOAT
,
devID
,
mem
);
w
.
SetDataRand
(
-
minmax
,
minmax
);
float
finfout
=
sqrt
(
6
/
(
hSize
+
vSize
));
w
.
SetDataRand
(
-
finfout
,
finfout
);
}
/*
...
...
source/sample/transformer/T2TTrainer.cpp
查看文件 @
b405b50e
...
...
@@ -59,6 +59,8 @@ void T2TTrainer::Init(int argc, const char ** argv)
LoadParamInt
(
argc
,
argv
,
"wbatch"
,
&
wBatchSize
,
1
);
LoadParamInt
(
argc
,
argv
,
"nepoch"
,
&
nepoch
,
1
);
LoadParamInt
(
argc
,
argv
,
"nstep"
,
&
nstep
,
1
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
d
,
512
);
LoadParamInt
(
argc
,
argv
,
"nwarmup"
,
&
nwarmup
,
4000
);
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
1
);
LoadParamBool
(
argc
,
argv
,
"sorted"
,
&
isLenSorted
,
false
);
LoadParamInt
(
argc
,
argv
,
"bufsize"
,
&
bufSize
,
50000
);
...
...
@@ -82,6 +84,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
int
wordCountTotal
=
0
;
bool
isEnd
=
false
;
float
loss
=
0
;
float
lr
=
0
;
XNet
net
;
...
...
@@ -108,8 +111,12 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
/* back-propagation for obtaining gradients */
net
.
Backward
(
output
,
batch
,
CROSSENTROPY
);
/* learning rate */
lr
=
(
1
/
sqrt
((
float
)
d
))
*
MIN
(
pow
(
step
+
1
,
-
0.5
),
(
step
+
1
)
*
pow
(
nwarmup
,
-
1.5
));
lr
=
0.000005
F
;
/* update the parameters */
Update
(
model
);
Update
(
model
,
lr
);
/* get probabilities */
float
prob
=
GetProb
(
&
output
,
&
batch
,
NULL
);
...
...
@@ -125,8 +132,8 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
if
(
step
%
1
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
XPRINT
5
(
0
,
stderr
,
"[INFO]
elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
XPRINT
6
(
0
,
stderr
,
"[INFO] lr=%e,
elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
lr
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
}
}
...
...
@@ -135,8 +142,8 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
double
elapsed
=
GetClockSec
()
-
startT
;
XPRINT
5
(
0
,
stderr
,
"[INFO]
elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
XPRINT
6
(
0
,
stderr
,
"[INFO] lr=%e,
elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
lr
,
elapsed
,
step
,
epoch
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
XPRINT3
(
0
,
stderr
,
"[INFO] training finished (took %.1fs, step=%d and epoch=%d)
\n
"
,
elapsed
,
step
,
epoch
);
}
...
...
@@ -318,9 +325,13 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/*
update the model by delta rule
\theta_new = \theta - \lrate * grad
where
\lrate = d^-0.5 * min(stepNum^-0.5, stepNum * warmupStepNum^-1.5)
>> model - the t2t model
>> lr - learning rate
*/
void
T2TTrainer
::
Update
(
T2TModel
*
model
)
void
T2TTrainer
::
Update
(
T2TModel
*
model
,
const
float
lr
)
{
XList
ws
(
100
);
...
...
@@ -342,8 +353,18 @@ void T2TTrainer::Update(T2TModel * model)
CheckNTErrors
(
para
!=
NULL
,
"NULL parameter tensor!"
);
CheckNTErrors
(
paraGrad
!=
NULL
,
"NULL gradient tensor!"
);
/*DTYPE * d = (DTYPE*)paraGrad->data;
for(int i = 0; i < paraGrad->unitNum; i++){
if(IsINF(d[i])){
fprintf(stderr, "isinf %d\n", i);
}
if(IsNAN(d[i])){
fprintf(stderr, "isnan %d\n", i);
}
}*/
/* the delta rule */
_Sum
(
para
,
paraGrad
,
para
,
-
lr
ate
);
_Sum
(
para
,
paraGrad
,
para
,
-
lr
);
}
}
...
...
source/sample/transformer/T2TTrainer.h
查看文件 @
b405b50e
...
...
@@ -64,6 +64,12 @@ public:
/* indicates whether the sequence is sorted by length */
bool
isLenSorted
;
/* dimension size of each inner layer */
int
d
;
/* step number of warm-up for training */
int
nwarmup
;
/* vocabulary size of the source side */
int
vSize
;
...
...
@@ -105,7 +111,7 @@ public:
float
GetProb
(
XTensor
*
output
,
XTensor
*
gold
,
XTensor
*
wordProbs
);
/* update the model by delta rule */
void
Update
(
T2TModel
*
model
);
void
Update
(
T2TModel
*
model
,
const
float
lr
);
};
...
...
source/tensor/XTensor.cpp
查看文件 @
b405b50e
...
...
@@ -1046,7 +1046,7 @@ bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
CheckNTErrors
(
d2
>=
0
&&
d2
<
dimSize
[
2
],
"dimension 1 is out of range!"
);
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in default type."
);
int
dims
[
3
]
=
{
d0
,
d1
,
d
1
};
int
dims
[
3
]
=
{
d0
,
d1
,
d
2
};
return
SetToDevice
(
devID
,
GetCell
(
dims
,
3
),
value
);
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论