Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
55dfe49b
Commit
55dfe49b
authored
Apr 08, 2019
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
model score with length penalty
parent
1f041016
显示空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
169 行增加
和
15 行删除
+169
-15
source/sample/transformer/T2TLengthPenalty.cpp
+41
-0
source/sample/transformer/T2TLengthPenalty.h
+49
-0
source/sample/transformer/T2TPredictor.cpp
+8
-2
source/sample/transformer/T2TPredictor.h
+21
-8
source/sample/transformer/T2TSearch.cpp
+44
-5
source/sample/transformer/T2TSearch.h
+6
-0
没有找到文件。
source/sample/transformer/T2TLengthPenalty.cpp
0 → 100644
查看文件 @
55dfe49b
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "../../tensor/core/CHeader.h"
#include "T2TLengthPenalty.h"
using
namespace
nts
;
namespace
transformer
{
/*
GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence
>> length - length of the sequence (for each entry)
>> lp - length penaltyof the sequence (for each entry)
*/
void
T2TLengthPenalizer
::
GNMT
(
const
XTensor
&
length
,
XTensor
&
lp
,
float
alpha
)
{
XTensor
base
;
base
=
ScaleAndShift
(
ScaleAndShift
(
length
,
0
,
5.0
F
),
1.0
F
/
(
5
+
1
));
lp
=
Power
(
base
,
alpha
);
}
}
source/sample/transformer/T2TLengthPenalty.h
0 → 100644
查看文件 @
55dfe49b
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2019, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2019-04-08
* Start of a new week - I just finished several documents.
* Writing document is harder than writing code :)
*/
#ifndef __T2TLENGTHPENALTY_H__
#define __T2TLENGTHPENALTY_H__
#include "../../tensor/XTensor.h"
using
namespace
nts
;
namespace
transformer
{
/* We intend to penalize short sequences because they have higher score
in product of a sequence of probability-like terms and have more chances
to beat others in search. */
class
T2TLengthPenalizer
{
public
:
/* GNMT-like length penalty: pl = ((5 + n)/(5 + 1))^\alpha
where n = length of the sequence */
static
void
GNMT
(
const
XTensor
&
length
,
XTensor
&
lp
,
float
alpha
);
};
}
#endif
\ No newline at end of file
source/sample/transformer/T2TPredictor.cpp
查看文件 @
55dfe49b
...
...
@@ -53,8 +53,14 @@ void T2TStateBundle::MakeStates(int num)
states
=
new
T2TState
[
num
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
for
(
int
i
=
0
;
i
<
num
;
i
++
){
states
[
i
].
prediction
=
-
1
;
states
[
i
].
prob
=
0
;
states
[
i
].
probPath
=
0
;
states
[
i
].
modelScore
=
0
;
states
[
i
].
nstep
=
0
;
states
[
i
].
last
=
NULL
;
}
}
/* constructor */
...
...
@@ -124,7 +130,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, XTensor *
inputDec
=
Concatenate
(
*
inputLast
,
s
->
prediction
,
inputLast
->
GetDim
(
-
1
));
/* prediction probabilities */
XTensor
&
output
=
next
->
score
;
XTensor
&
output
=
next
->
prob
;
XTensor
paddingDec
;
InitTensor3D
(
&
paddingDec
,
inputDec
.
GetDim
(
0
),
inputDec
.
GetDim
(
1
),
m
->
outputLayer
->
vSize
,
X_INT
);
...
...
source/sample/transformer/T2TPredictor.h
查看文件 @
55dfe49b
...
...
@@ -24,6 +24,7 @@
#define __T2TPREDICTOR_H__
#include "T2TModel.h"
#include "T2TLengthPenalty.h"
namespace
transformer
{
...
...
@@ -36,11 +37,17 @@ public:
/* we assume that the prediction is an integer */
int
prediction
;
/*
score of the prediction
*/
float
score
;
/*
probability of every prediction (last state of the path)
*/
float
prob
;
/* score of the path */
float
scorePath
;
/* probability of every path */
float
probPath
;
/* model score of every path */
float
modelScore
;
/* nubmer of steps we go over so far */
int
nstep
;
/* pointer to the previous state */
T2TState
*
last
;
...
...
@@ -56,11 +63,17 @@ public:
/* id of the previous state that generates the current one */
XTensor
preID
;
/* score of every prediction (last state of the path) */
XTensor
score
;
/* probability of every prediction (last state of the path) */
XTensor
prob
;
/* probability of every path */
XTensor
probPath
;
/* model score of every path */
XTensor
modelScore
;
/* s
core of every path
*/
XTensor
scorePath
;
/* s
tep number of each hypothesis
*/
XTensor
nstep
;
/* layers on the encoder side. We actually use the encoder output instead
of all hidden layers. */
...
...
source/sample/transformer/T2TSearch.cpp
查看文件 @
55dfe49b
...
...
@@ -36,6 +36,7 @@ initialize the model
void
T2TSearch
::
InitModel
(
int
argc
,
char
**
argv
)
{
LoadParamInt
(
argc
,
argv
,
"beamsize"
,
&
beamSize
,
1
);
LoadParamFloat
(
argc
,
argv
,
"lenalpha"
,
&
alpha
,
0.2
F
);
}
/*
...
...
@@ -75,6 +76,9 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
/* predict the next state */
predictor
.
Predict
(
next
,
&
encoding
,
input
,
padding
);
/* compute the model score (given the prediction probability) */
Score
(
cur
,
next
);
/* beam pruning */
Generate
(
next
);
}
...
...
@@ -83,6 +87,37 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
}
/*
compute the model score for each hypothesis
>> prev - the beam of the previous state
>> beam - the beam that keeps a number of states
*/
void
T2TSearch
::
Score
(
T2TStateBundle
*
prev
,
T2TStateBundle
*
beam
)
{
XTensor
&
score
=
beam
->
modelScore
;
XTensor
&
prob
=
beam
->
prob
;
XTensor
&
probPath
=
beam
->
probPath
;
XTensor
&
lenPrev
=
prev
->
nstep
;
XTensor
&
len
=
beam
->
nstep
;
XTensor
lp
;
InitTensor
(
&
score
,
&
prob
);
/* the log-scale probability of the entire sequence */
_Sum
(
&
prob
,
&
probPath
,
&
score
);
InitTensor
(
&
len
,
&
lenPrev
);
InitTensor
(
&
lp
,
&
lenPrev
);
_ScaleAndShift
(
&
lenPrev
,
&
len
,
1.0
F
,
1.0
F
);
/* the GNMT-like length penalty */
T2TLengthPenalizer
::
GNMT
(
len
,
lp
,
alpha
);
/* score = log-prob/lp */
_Div
(
&
score
,
&
lp
,
&
score
);
}
/*
generate tokens for the next state via beam pruning
>> beam - the beam that keeps a number of states
*/
...
...
@@ -93,7 +128,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
int
dimsTopK
[
MAX_TENSOR_DIM_NUM
];
XTensor
scoreTopK
;
XTensor
&
score
=
beam
->
s
core
;
XTensor
&
score
=
beam
->
modelS
core
;
XTensor
&
index
=
beam
->
prediction
;
XTensor
&
preID
=
beam
->
preID
;
int
order
=
score
.
order
;
...
...
@@ -131,16 +166,20 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* "preID" represents the id (or the offset) of previous state used to make the current
hypothesis. Note that we reshape the "score" tensor into a matrix where each
row means a previous state. The column number is size-of-beam * vocab-size. We,
therefore, divide
the top-k index by vocab-size to compute the id of previous state for
each hypothesis in the top-k list. */
therefore, divide
entries of the top-k index by vocab-size to compute the id of
previous state for
each hypothesis in the top-k list. */
Descale
(
preID
,
sizeVocab
);
/* Then
we are going to do something similar to "preID". For the top-k predictions, we
need
to know their indices in the vocabulary. We compute the offset of each prediction
/* Then
, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */
Mod
(
index
,
sizeVocab
);
score
.
Reshape
(
order
,
dims
);
/* we keep the top-k scores */
InitTensor
(
&
score
,
&
scoreTopK
);
CopyValues
(
scoreTopK
,
score
);
}
/*
...
...
source/sample/transformer/T2TSearch.h
查看文件 @
55dfe49b
...
...
@@ -35,6 +35,9 @@ namespace transformer
class
T2TSearch
{
private
:
/* the alpha parameter controls the length preference */
float
alpha
;
/* predictor */
T2TPredictor
predictor
;
...
...
@@ -57,6 +60,9 @@ public:
/* search for the most promising states */
void
Search
(
T2TModel
*
model
,
XTensor
*
input
,
XTensor
*
padding
,
XTensor
*
output
);
/* compute the model score for each hypothesis */
void
Score
(
T2TStateBundle
*
prev
,
T2TStateBundle
*
beam
);
/* generate token indices via beam pruning */
void
Generate
(
T2TStateBundle
*
beam
);
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论