Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
de548dd3
Commit
de548dd3
authored
Aug 04, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bu fixes
parent
51b4da42
显示空白字符变更
内嵌
并排
正在显示
29 个修改的文件
包含
175 行增加
和
126 行删除
+175
-126
source/network/Main.cpp
+1
-37
source/network/XNet.cpp
+5
-0
source/network/XNet.h
+1
-0
source/sample/fnnlm/FNNLM.cpp
+10
-3
source/sample/transformer/T2TAttention.cpp
+5
-9
source/sample/transformer/T2TAttention.h
+1
-1
source/sample/transformer/T2TEmbedding.cpp
+11
-15
source/sample/transformer/T2TEmbedding.h
+1
-1
source/sample/transformer/T2TEncoder.cpp
+11
-9
source/sample/transformer/T2TEncoder.h
+3
-3
source/sample/transformer/T2TFNN.cpp
+3
-6
source/sample/transformer/T2TFNN.h
+1
-1
source/sample/transformer/T2TLayerNormal.cpp
+3
-6
source/sample/transformer/T2TLayerNormal.h
+1
-1
source/sample/transformer/T2TModel.cpp
+5
-3
source/sample/transformer/T2TModel.h
+2
-2
source/sample/transformer/T2TOutput.cpp
+11
-9
source/sample/transformer/T2TOutput.h
+2
-2
source/sample/transformer/T2TTrainer.cpp
+50
-9
source/sample/transformer/T2TTrainer.h
+3
-0
source/sample/transformer/Transformer.cpp
+1
-0
source/tensor/XTensor.cpp
+5
-5
source/tensor/core/arithmetic/MatrixMul.cpp
+0
-0
source/tensor/core/arithmetic/MatrixMulBatched.cpp
+5
-0
source/tensor/core/arithmetic/Sum.cpp
+6
-1
source/tensor/core/math/Normalize.cu
+0
-0
source/tensor/core/shape/MergeBlockLists.cu
+5
-0
source/tensor/function/LogSoftmax.cpp
+2
-3
source/tensor/function/Softmax.cu
+21
-0
没有找到文件。
source/network/Main.cpp
查看文件 @
de548dd3
...
...
@@ -40,12 +40,7 @@ using namespace transformer;
int
main
(
int
argc
,
const
char
**
argv
)
{
//TransposeTest();
//return 0;
//SumDimTest();
//return 0;
//_CrtSetBreakAlloc(896);
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-fnnlm"
))
FNNLMMain
(
argc
-
1
,
argv
+
1
);
...
...
@@ -58,37 +53,6 @@ int main( int argc, const char ** argv )
fprintf
(
stderr
,
"Or run this program with
\"
-fnnlm
\"
for sample FNNLM!
\n
"
);
}
return
0
;
XNet
net
;
XTensor
a
;
XTensor
b
;
XTensor
c
;
InitTensor2D
(
&
a
,
2
,
2
);
InitTensor2D
(
&
b
,
2
,
4
);
InitTensor2D
(
&
c
,
2
,
4
);
a
.
SetZeroAll
();
b
.
SetZeroAll
();
c
.
SetZeroAll
();
SetDataFixed
(
a
,
0.1
F
);
a
.
Set2D
(
0.3
F
,
1
,
0
);
a
.
Set2D
(
0.4
F
,
1
,
1
);
b
=
Merge
(
a
,
a
,
1
);
c
=
HTanH
(
MMul
(
a
,
b
));
a
.
Dump
(
stderr
,
"a:"
);
b
.
Dump
(
stderr
,
"b:"
);
c
.
Dump
(
stderr
,
"c:"
);
XLink
::
ShowNetwork
(
stderr
,
&
c
);
net
.
Backward
(
c
);
net
.
Dump
(
stderr
);
//_CrtDumpMemoryLeaks();
return
0
;
...
...
source/network/XNet.cpp
查看文件 @
de548dd3
...
...
@@ -46,6 +46,11 @@ unsigned int MakeNetID()
return
id
;
}
void
XNetClearAll
()
{
MUTEX_DELE
(
netMutex
);
}
/* constructor */
XNet
::
XNet
()
{
...
...
source/network/XNet.h
查看文件 @
de548dd3
...
...
@@ -95,6 +95,7 @@ struct XNet
extern
unsigned
int
netIDGlobal
;
extern
MUTEX_HANDLE
netMutex
;
extern
unsigned
int
MakeNetID
();
extern
void
XNetClearAll
();
}
...
...
source/sample/fnnlm/FNNLM.cpp
查看文件 @
de548dd3
...
...
@@ -240,6 +240,7 @@ void Check(FNNModel &model)
{
CheckErrors
(
model
.
n
>
0
&&
model
.
n
<=
MAX_N_GRAM
,
"The LM order is out of range (use -n)!"
);
CheckErrors
(
model
.
vSize
>
0
,
"no vocabulary size found (use -vsize)!"
);
CheckErrors
(
model
.
eSize
>
0
,
"no embedding size found (use -esize)!"
);
}
/* make a hard copy of the fnn model */
...
...
@@ -632,8 +633,10 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
if
(
pin
<=
0
){
int
len
=
(
int
)
strlen
(
lineBuf
);
if
(
lineBuf
[
len
-
1
]
==
'\r'
)
while
(
lineBuf
[
len
-
1
]
==
'\r'
||
lineBuf
[
len
-
1
]
==
'\n'
){
lineBuf
[
len
-
1
]
=
0
;
len
--
;
}
len
=
(
int
)
strlen
(
lineBuf
);
if
(
len
==
0
)
...
...
@@ -644,10 +647,11 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
/* how many words are in the sentence */
int
wNum
=
0
;
int
i
=
0
;
for
(
i
nt
i
=
pin
;
i
<
len
;
i
++
){
for
(
i
=
pin
;
i
<
len
;
i
++
){
/* load word (id) seperated by space or tab */
if
((
lineBuf
[
i
]
==
' '
||
lineBuf
[
i
]
==
'\t'
||
i
==
len
-
1
)
&&
wSize
>
0
){
if
((
lineBuf
[
i
]
==
' '
||
lineBuf
[
i
]
==
'\t'
)
&&
wSize
>
0
){
lineBuf
[
i
]
=
0
;
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wSize
=
0
;
...
...
@@ -656,6 +660,9 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
wSize
++
;
}
if
(
wSize
>
0
)
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wordBufCount
=
wNum
;
lineNum
++
;
}
...
...
source/sample/transformer/T2TAttention.cpp
查看文件 @
de548dd3
...
...
@@ -80,16 +80,16 @@ make the network
>> v - values
<< return - multi-attention result
*/
XTensor
*
T2TAttention
::
Make
(
XTensor
*
k
,
XTensor
*
q
,
XTensor
*
v
)
XTensor
T2TAttention
::
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
)
{
XTensor
k2
;
XTensor
q2
;
XTensor
v2
;
/* linear transofmration before self-attention */
k2
=
MMul
(
*
k
,
wk
);
q2
=
MMul
(
*
q
,
wq
);
v2
=
MMul
(
*
v
,
wv
);
k2
=
MMul
(
k
,
wk
);
q2
=
MMul
(
q
,
wq
);
v2
=
MMul
(
v
,
wv
);
XTensor
kheads
;
XTensor
qheads
;
...
...
@@ -107,12 +107,8 @@ XTensor * T2TAttention::Make(XTensor * k, XTensor * q, XTensor * v)
scalar
=
Softmax
(
Linear
(
BMMul
(
qheads
,
X_NOTRANS
,
kheads
,
X_TRANS
),
1
/
sqrt
((
float
)
dk
)),
-
1
);
att
=
BMMul
(
scalar
,
vheads
);
XTensor
*
result
=
new
XTensor
();
/* concatenate the heads */
*
result
=
Merge
(
att
,
att
.
order
-
1
);
return
result
;
return
Merge
(
att
,
att
.
order
-
1
);
}
}
source/sample/transformer/T2TAttention.h
查看文件 @
de548dd3
...
...
@@ -77,7 +77,7 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the network */
XTensor
*
Make
(
XTensor
*
k
,
XTensor
*
q
,
XTensor
*
v
);
XTensor
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
);
};
}
...
...
source/sample/transformer/T2TEmbedding.cpp
查看文件 @
de548dd3
...
...
@@ -101,21 +101,21 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
/*
make the network
*/
XTensor
*
T2TEmbedder
::
Make
(
XTensor
*
input
)
XTensor
T2TEmbedder
::
Make
(
XTensor
&
input
)
{
CheckNTErrors
(
input
->
GetDim
(
-
1
)
==
vSize
,
"Wrong vocabulary size!"
);
CheckNTErrors
(
input
->
order
>
1
,
"Wrong input tensor size!"
);
CheckNTErrors
(
input
->
dimSize
[
input
->
order
-
2
]
<
maxLength
,
"The sequence is too long!"
);
CheckNTErrors
(
input
.
GetDim
(
-
1
)
==
vSize
,
"Wrong vocabulary size!"
);
CheckNTErrors
(
input
.
order
>
1
,
"Wrong input tensor size!"
);
CheckNTErrors
(
input
.
dimSize
[
input
.
order
-
2
]
<
maxLength
,
"The sequence is too long!"
);
CheckNTErrors
(
vSize
>
0
,
"set vocabulary size by
\"
-vsize
\"
"
);
CheckNTErrors
(
eSize
>
0
,
"set embedding size by
\"
-esize
\"
"
);
int
dims
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dims
,
input
->
dimSize
,
input
->
order
*
sizeof
(
int
));
dims
[
input
->
order
-
1
]
=
eSize
;
memcpy
(
dims
,
input
.
dimSize
,
input
.
order
*
sizeof
(
int
));
dims
[
input
.
order
-
1
]
=
eSize
;
bool
match
=
(
posEmbedding
.
order
==
input
->
order
);
bool
match
=
(
posEmbedding
.
order
==
input
.
order
);
if
(
match
){
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
if
(
dims
[
i
]
!=
posEmbedding
.
GetDim
(
i
))
match
=
false
;
}
...
...
@@ -123,7 +123,7 @@ XTensor * T2TEmbedder::Make(XTensor * input)
/* we make positional embeddings first */
if
(
!
match
){
InitTensor
(
&
posEmbedding
,
input
->
order
,
dims
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
InitTensor
(
&
posEmbedding
,
input
.
order
,
dims
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
XTensor
*
posTMP
=
NewTensorBuf
(
2
,
dims
+
1
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
_CopyValues
(
&
posEmbeddingBase
,
0
,
posTMP
->
unitNum
,
posTMP
,
0
);
...
...
@@ -135,14 +135,10 @@ XTensor * T2TEmbedder::Make(XTensor * input)
XTensor
wordEmbedding
;
/* then we make word embeddings */
wordEmbedding
=
MMul
(
*
input
,
w
);
XTensor
*
result
=
new
XTensor
();
wordEmbedding
=
MMul
(
&
input
,
w
);
/* we sum over the two embeddings */
*
result
=
wordEmbedding
+
posEmbedding
;
return
result
;
return
wordEmbedding
+
posEmbedding
;
}
}
source/sample/transformer/T2TEmbedding.h
查看文件 @
de548dd3
...
...
@@ -77,7 +77,7 @@ public:
void
MakePosEmbedding
(
int
eSize
,
int
d
,
int
length
);
/* make the network */
XTensor
*
Make
(
XTensor
*
input
);
XTensor
Make
(
XTensor
&
input
);
};
}
...
...
source/sample/transformer/T2TEncoder.cpp
查看文件 @
de548dd3
...
...
@@ -82,26 +82,28 @@ make the encoding network
>> input - the input tensor of the encoder
<< return - the output tensor of the encoder
*/
XTensor
*
AttEncoder
::
Make
(
XTensor
*
input
)
XTensor
AttEncoder
::
Make
(
XTensor
&
input
)
{
XTensor
*
x
=
embedder
.
Make
(
input
);
XTensor
x
;
x
=
embedder
.
Make
(
input
);
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
){
XTensor
*
att
;
XTensor
*
ln
;
XTensor
*
fnn
;
XTensor
att
;
XTensor
ln
;
XTensor
fnn
;
XTensor
res
;
/* self attention */
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
);
/* residual connection */
res
=
Sum
(
*
att
,
*
x
);
res
=
Sum
(
att
,
x
);
/* TODO: dropout */
/* layer normalization */
ln
=
layerNorms
[
i
].
Make
(
&
res
);
ln
=
layerNorms
[
i
].
Make
(
res
);
/* input of next layer */
x
=
ln
;
...
...
@@ -110,12 +112,12 @@ XTensor * AttEncoder::Make(XTensor * input)
fnn
=
fnns
[
i
].
Make
(
x
);
/* residual connection */
res
=
Sum
(
*
fnn
,
*
x
);
res
=
Sum
(
fnn
,
x
);
/* TODO: dropout */
/* layer normalization */
ln
=
layerNorms
[
i
].
Make
(
&
res
);
ln
=
layerNorms
[
i
].
Make
(
res
);
/* input of next layer */
x
=
ln
;
...
...
source/sample/transformer/T2TEncoder.h
查看文件 @
de548dd3
...
...
@@ -40,7 +40,7 @@ class T2TEncoder
{
public
:
virtual
XTensor
*
Make
(
XTensor
*
input
)
=
0
;
XTensor
Make
(
XTensor
&
input
)
=
0
;
};
/*
...
...
@@ -49,7 +49,7 @@ the encoder based on RNN
class
RNNEncoder
:
T2TEncoder
{
public
:
XTensor
*
Make
(
XTensor
*
input
);
XTensor
Make
(
XTensor
&
input
);
};
...
...
@@ -106,7 +106,7 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the encoding network */
XTensor
*
Make
(
XTensor
*
input
);
XTensor
Make
(
XTensor
&
input
);
};
...
...
source/sample/transformer/T2TFNN.cpp
查看文件 @
de548dd3
...
...
@@ -78,18 +78,15 @@ y = max(0, x * w1 + b1) * w2 + b2
>> input - the input tensor
>> return - the output tensor
*/
XTensor
*
T2TFNN
::
Make
(
XTensor
*
input
)
XTensor
T2TFNN
::
Make
(
XTensor
&
input
)
{
XTensor
t1
;
XTensor
*
result
=
new
XTensor
();
/* t1 = max(0, x * w1 + b1) */
t1
=
Rectify
(
MMul
(
*
input
,
X_NOTRANS
,
w1
,
X_NOTRANS
)
+
b1
);
t1
=
Rectify
(
MMul
(
input
,
X_NOTRANS
,
w1
,
X_NOTRANS
)
+
b1
);
/* result = t1 * w2 + b2 */
*
result
=
MMul
(
t1
,
X_NOTRANS
,
w2
,
X_NOTRANS
)
+
b2
;
return
result
;
return
MMul
(
t1
,
X_NOTRANS
,
w2
,
X_NOTRANS
)
+
b2
;
}
...
...
source/sample/transformer/T2TFNN.h
查看文件 @
de548dd3
...
...
@@ -72,7 +72,7 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the network */
XTensor
*
Make
(
XTensor
*
input
);
XTensor
Make
(
XTensor
&
input
);
};
...
...
source/sample/transformer/T2TLayerNormal.cpp
查看文件 @
de548dd3
...
...
@@ -57,15 +57,14 @@ y =
>> input - the input tensor
>> return - layer normalization output
*/
XTensor
*
T2TLN
::
Make
(
XTensor
*
input
)
XTensor
T2TLN
::
Make
(
XTensor
&
input
)
{
XTensor
&
x
=
*
input
;
XTensor
&
x
=
input
;
XTensor
mean
;
XTensor
variance
;
XTensor
standard
;
XTensor
meanFilled
;
XTensor
standardFilled
;
XTensor
*
result
=
new
XTensor
();
/* \mu = (sum_i x_i)/m */
mean
=
ReduceSum
(
x
,
x
.
order
-
1
);
...
...
@@ -82,9 +81,7 @@ XTensor * T2TLN::Make(XTensor * input)
standardFilled
=
Unsqueeze
(
standard
,
x
.
order
-
1
,
x
.
GetDim
(
-
1
));
/* x' = (x - \mu)/standard */
*
result
=
(
x
-
meanFilled
)
/
standardFilled
;
return
result
;
return
(
x
-
meanFilled
)
/
standardFilled
;
}
}
source/sample/transformer/T2TLayerNormal.h
查看文件 @
de548dd3
...
...
@@ -49,7 +49,7 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the network */
XTensor
*
Make
(
XTensor
*
input
);
XTensor
Make
(
XTensor
&
input
);
};
}
...
...
source/sample/transformer/T2TModel.cpp
查看文件 @
de548dd3
...
...
@@ -69,7 +69,7 @@ make the encoding network
>> input - input tensor
<< return - encoding result
*/
XTensor
*
T2TModel
::
MakeEncoding
(
XTensor
*
input
)
XTensor
T2TModel
::
MakeEncoding
(
XTensor
&
input
)
{
return
encoder
.
Make
(
input
);
}
...
...
@@ -79,10 +79,12 @@ make the entire network (with the output softmax layer)
>> input - input tensor
>> output - output tensor (distribution)
*/
void
T2TModel
::
Make
(
XTensor
*
input
,
XTensor
*
output
)
void
T2TModel
::
Make
(
XTensor
&
input
,
XTensor
&
output
)
{
if
(
isLM
){
XTensor
*
encoding
=
MakeEncoding
(
input
);
XTensor
encoding
;
encoding
=
MakeEncoding
(
input
);
outputLayer
.
Make
(
encoding
,
output
);
}
else
{
...
...
source/sample/transformer/T2TModel.h
查看文件 @
de548dd3
...
...
@@ -66,10 +66,10 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
);
/* make the encoding network */
XTensor
*
MakeEncoding
(
XTensor
*
input
);
XTensor
MakeEncoding
(
XTensor
&
input
);
/* make the entire network (with the output softmax layer) */
void
Make
(
XTensor
*
input
,
XTensor
*
output
);
void
Make
(
XTensor
&
input
,
XTensor
&
output
);
};
}
...
...
source/sample/transformer/T2TOutput.cpp
查看文件 @
de548dd3
...
...
@@ -53,11 +53,15 @@ void T2TOutput::InitModel(int argc, const char ** argv, int myDevID, XMem * myMe
devID
=
myDevID
;
mem
=
myMem
;
float
minmax
=
0
;
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
-
1
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
inSize
,
DEFAULT_BEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
hSize
,
DEFAULT_BEDDING_SIZE
);
LoadParamFloat
(
argc
,
argv
,
"outputminmax"
,
&
minmax
,
0.08
F
);
InitTensor2D
(
&
w
,
hSize
,
vSize
,
X_FLOAT
,
devID
,
mem
);
w
.
SetDataRand
(
-
minmax
,
minmax
);
}
/*
...
...
@@ -66,14 +70,11 @@ y = softmax(x * w)
>> input - input tensor
<< return - output tensor
*/
XTensor
*
T2TOutput
::
Make
(
XTensor
*
input
)
XTensor
T2TOutput
::
Make
(
XTensor
&
input
)
{
XTensor
&
x
=
*
input
;
XTensor
*
result
=
new
XTensor
();
*
result
=
LogSoftmax
(
MMul
(
x
,
w
),
-
1
);
XTensor
&
x
=
input
;
return
result
;
return
LogSoftmax
(
MMul
(
x
,
w
),
-
1
)
;
}
/*
...
...
@@ -81,11 +82,11 @@ make the network (redefined output tensor)
>> input - input tensor
>> output - output tensor
*/
void
T2TOutput
::
Make
(
XTensor
*
input
,
XTensor
*
output
)
void
T2TOutput
::
Make
(
XTensor
&
input
,
XTensor
&
output
)
{
XTensor
&
x
=
*
input
;
XTensor
&
x
=
input
;
*
output
=
LogSoftmax
(
MMul
(
x
,
w
),
-
1
);
output
=
LogSoftmax
(
MMul
(
x
,
w
),
-
1
);
}
}
\ No newline at end of file
source/sample/transformer/T2TOutput.h
查看文件 @
de548dd3
...
...
@@ -62,10 +62,10 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the network */
XTensor
*
Make
(
XTensor
*
input
);
XTensor
Make
(
XTensor
&
input
);
/* make the network (redefined output tensor) */
void
Make
(
XTensor
*
input
,
XTensor
*
output
);
void
Make
(
XTensor
&
input
,
XTensor
&
output
);
};
...
...
source/sample/transformer/T2TTrainer.cpp
查看文件 @
de548dd3
...
...
@@ -43,6 +43,7 @@ T2TTrainer::~T2TTrainer()
{
delete
[]
buf
;
delete
[]
seqLen
;
delete
[]
seqOffset
;
}
/*
...
...
@@ -96,18 +97,19 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
/* batch of input sequences */
XTensor
batch
;
while
(
LoadBatch
(
file
,
&
batch
,
1
,
vSize
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
wc
)){
/* output probabilities */
XTensor
output
;
while
(
LoadBatch
(
file
,
&
batch
,
1
,
vSize
,
sBatchSize
,
wBatchSize
,
isLenSorted
,
wc
)){
/* make the network */
model
->
Make
(
&
batch
,
&
output
);
model
->
Make
(
batch
,
output
);
/* back-propagation for obtaining gradients */
net
.
Backward
(
output
,
batch
,
CROSSENTROPY
);
/* TODO: update the model!!!! */
/* update the parameters */
Update
(
model
);
/* get probabilities */
float
prob
=
GetProb
(
&
output
,
&
batch
,
NULL
);
...
...
@@ -121,7 +123,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
break
;
}
if
(
step
%
1
00
==
0
)
{
if
(
step
%
1
==
0
)
{
double
elapsed
=
GetClockSec
()
-
startT
;
XPRINT5
(
0
,
stderr
,
"[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f
\n
"
,
elapsed
,
step
,
epoch
+
1
,
wordCountTotal
,
exp
(
loss
/
wordCount
));
...
...
@@ -153,8 +155,10 @@ int T2TTrainer::LoadBuf(FILE * file)
while
(
fgets
(
line
,
MAX_SEQUENCE_LENGTH
-
1
,
file
)){
int
len
=
(
int
)
strlen
(
line
);
if
(
line
[
len
-
1
]
==
'\r'
)
while
(
line
[
len
-
1
]
==
'\r'
||
line
[
len
-
1
]
==
'\n'
){
line
[
len
-
1
]
=
0
;
len
--
;
}
len
=
(
int
)
strlen
(
line
);
if
(
len
==
0
)
...
...
@@ -166,10 +170,11 @@ int T2TTrainer::LoadBuf(FILE * file)
/* how many words are in the sentence */
int
wNum
=
0
;
int
wNumLocal
=
0
;
int
i
=
0
;
for
(
i
nt
i
=
0
;
i
<
len
;
i
++
){
for
(
i
=
0
;
i
<
len
;
i
++
){
/* load word (id) seperated by space or tab */
if
((
line
[
i
]
==
' '
||
line
[
i
]
==
'\t'
||
i
==
len
-
1
)
&&
wSize
>
0
){
if
((
line
[
i
]
==
' '
||
line
[
i
]
==
'\t'
)
&&
wSize
>
0
){
line
[
i
]
=
0
;
if
(
wSize
==
3
&&
line
[
i
-
1
]
==
'|'
&&
line
[
i
-
2
]
==
'|'
&&
line
[
i
-
3
]
==
'|'
){
...
...
@@ -179,7 +184,7 @@ int T2TTrainer::LoadBuf(FILE * file)
wNumLocal
=
0
;
}
else
{
buf
[
wNum
++
]
=
atoi
(
line
+
i
-
wSize
);
buf
[
w
ordCount
+
w
Num
++
]
=
atoi
(
line
+
i
-
wSize
);
wNumLocal
++
;
}
...
...
@@ -189,6 +194,11 @@ int T2TTrainer::LoadBuf(FILE * file)
wSize
++
;
}
if
(
wSize
>
0
){
buf
[
wordCount
+
wNum
++
]
=
atoi
(
line
+
i
-
wSize
);
wNumLocal
++
;
}
seqLen
[
seqCount
]
=
wNumLocal
;
seqOffset
[
seqCount
]
=
wordCount
+
wNum
-
wNumLocal
;
seqCount
++
;
...
...
@@ -305,4 +315,35 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
return
result
.
Get1D
(
0
);
}
/*
update the model by delta rule
>> model - the t2t model
*/
void
T2TTrainer
::
Update
(
T2TModel
*
model
)
{
XList
ws
(
100
);
ws
.
Add
(
&
model
->
outputLayer
.
w
);
for
(
int
i
=
0
;
i
<
model
->
encoder
.
nlayer
;
i
++
){
ws
.
Add
(
&
model
->
encoder
.
fnns
[
i
].
w1
);
ws
.
Add
(
&
model
->
encoder
.
fnns
[
i
].
b1
);
ws
.
Add
(
&
model
->
encoder
.
fnns
[
i
].
w2
);
ws
.
Add
(
&
model
->
encoder
.
fnns
[
i
].
b2
);
}
ws
.
Add
(
&
model
->
encoder
.
embedder
.
w
);
for
(
int
i
=
0
;
i
<
ws
.
count
;
i
++
){
XTensor
*
para
=
(
XTensor
*
)
ws
.
Get
(
i
);
XTensor
*
paraGrad
=
para
->
grad
;
CheckNTErrors
(
para
!=
NULL
,
"NULL parameter tensor!"
);
CheckNTErrors
(
paraGrad
!=
NULL
,
"NULL gradient tensor!"
);
/* the delta rule */
_Sum
(
para
,
paraGrad
,
para
,
-
lrate
);
}
}
}
source/sample/transformer/T2TTrainer.h
查看文件 @
de548dd3
...
...
@@ -103,6 +103,9 @@ public:
/* get word probabilities for a batch of sequences */
float
GetProb
(
XTensor
*
output
,
XTensor
*
gold
,
XTensor
*
wordProbs
);
/* update the model by delta rule */
void
Update
(
T2TModel
*
model
);
};
...
...
source/sample/transformer/Transformer.cpp
查看文件 @
de548dd3
...
...
@@ -23,6 +23,7 @@
#include "T2TModel.h"
#include "T2TUtility.h"
#include "T2TTrainer.h"
#include "../../tensor/XDevice.h"
namespace
transformer
{
...
...
source/tensor/XTensor.cpp
查看文件 @
de548dd3
...
...
@@ -1042,11 +1042,11 @@ set the value of a cell in a 3d tensor in default type
*/
bool
XTensor
::
Set3D
(
DTYPE
value
,
int
d0
,
int
d1
,
int
d2
)
{
CheckNTErrors
(
(
order
==
3
)
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
(
d0
>=
0
&&
d0
<
dimSize
[
0
])
,
"dimension 0 is out of range!"
);
CheckNTErrors
(
(
d2
>=
0
&&
d1
<
dimSize
[
1
])
,
"dimension 1 is out of range!"
);
CheckNTErrors
(
(
d2
>=
0
&&
d2
<
dimSize
[
2
])
,
"dimension 1 is out of range!"
);
CheckNTErrors
(
(
dataType
==
DEFAULT_DTYPE
)
,
"The tensor is not in default type."
);
CheckNTErrors
(
order
==
3
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
d0
>=
0
&&
d0
<
dimSize
[
0
]
,
"dimension 0 is out of range!"
);
CheckNTErrors
(
d1
>=
0
&&
d1
<
dimSize
[
1
]
,
"dimension 1 is out of range!"
);
CheckNTErrors
(
d2
>=
0
&&
d2
<
dimSize
[
2
]
,
"dimension 1 is out of range!"
);
CheckNTErrors
(
dataType
==
DEFAULT_DTYPE
,
"The tensor is not in default type."
);
int
dims
[
3
]
=
{
d0
,
d1
,
d1
};
...
...
source/tensor/core/arithmetic/MatrixMul.cpp
查看文件 @
de548dd3
source/tensor/core/arithmetic/MatrixMulBatched.cpp
查看文件 @
de548dd3
...
...
@@ -117,6 +117,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
blockNum
*=
a
->
dimSizeRDI
[
i
];
}
int
devIDBackup
=
0
;
ProtectCudaDev
(
a
->
devID
,
devIDBackup
);
cublasHandle_t
*
handle
=
a
->
mem
!=
NULL
?
a
->
mem
->
GetCublasHandle
()
:
GDevs
.
GetCudaHandle
(
a
->
devID
);
_CudaBLASMatrixMULBatchedStrided
(
handle
,
a
->
data
,
transposedA
,
a
->
dataType
,
aBlockSize
,
...
...
@@ -125,6 +128,8 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
a
->
dimSizeRDI
[
1
],
a
->
dimSizeRDI
[
0
],
b
->
dimSizeRDI
[
1
],
b
->
dimSizeRDI
[
0
],
c
->
dimSizeRDI
[
1
],
c
->
dimSizeRDI
[
0
],
alpha
,
beta
);
BacktoCudaDev
(
a
->
devID
,
devIDBackup
);
#endif
}
...
...
source/tensor/core/arithmetic/Sum.cpp
查看文件 @
de548dd3
...
...
@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "Sum.h"
#include "Sum.cuh"
#include "SumDim.h"
...
...
@@ -44,8 +45,12 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors
(
a
->
dataType
==
b
->
dataType
&&
a
->
dataType
==
c
->
dataType
,
"Unmatched tensors in addition!"
);
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
{
if
(
beta
==
0
){
_CopyValues
(
a
,
c
);
return
;
}
if
(
a
->
devID
>=
0
||
b
->
devID
>=
0
||
c
->
devID
>=
0
)
{
#ifdef USE_CUDA
if
(
a
==
c
)
{
int
P2PAccesible
=
0
;
...
...
source/tensor/core/math/Normalize.cu
查看文件 @
de548dd3
source/tensor/core/shape/MergeBlockLists.cu
查看文件 @
de548dd3
...
...
@@ -109,6 +109,9 @@ void _CudaMergeBlockLists(const XList * sourceList, int * blockSizes, int blockN
CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
realMaxBlockSize = maxBlockSize / sizeof(DTYPE);
int devIDBackup;
ProtectCudaDev(myMem->devID, devIDBackup);
int cudaGridSizes[3];
int cudaBlockSizes[3];
...
...
@@ -135,6 +138,8 @@ void _CudaMergeBlockLists(const XList * sourceList, int * blockSizes, int blockN
delete[] targetArrays;
delete[] sizes;
delete[] offsets;
BacktoCudaDev(myMem->devID, devIDBackup);
}
#endif // USE_CUDA
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
de548dd3
...
...
@@ -150,11 +150,10 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
}
}
if
(
x
->
devID
<
0
)
{
DelTensorBuf
(
max
);
DelTensorBuf
(
sum
);
}
else
{
if
(
x
->
devID
>=
0
)
{
delete
blockx
;
delete
blocky
;
delete
blockMax
;
...
...
source/tensor/function/Softmax.cu
查看文件 @
de548dd3
...
...
@@ -239,6 +239,9 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
CheckNTErrors((x->devID == y->devID), "Matrices used in log softmax are not on the same GPU.");
CheckNTErrors((y->order >= 1), "Empty tensor!");
int devIDBackup;
ProtectCudaDev(x->devID, devIDBackup);
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
CheckNTErrors((lossName == CROSSENTROPY ||
...
...
@@ -284,8 +287,14 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* make a matrix to keep \beta */
XTensor * beta = new XTensor(y->order - 1, dimSize, y->dataType, y->denseRatio, y->devID, mem);
if(mem != NULL){
ytmp->data = mem->AllocBuf(mem->devID, y->unitNum * y->unitSize);
beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize);
}
else{
ytmp->data = XMemAlloc(y->devID, y->unitNum * y->unitSize);
beta->data = XMemAlloc(y->devID, beta->unitNum * beta->unitSize);
}
/* \beta = \sum_i (dE/dy_i * y_i) */
_Multiply(dedy, y, ytmp, 0, 0);
...
...
@@ -298,8 +307,18 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
_Multiply(y, ytmp, dedx, 0, 0);
if(mem != NULL){
mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize);
mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize);
}
else{
XMemFree(y->devID, ytmp->data);
XMemFree(y->devID, beta->data);
}
ytmp->data = NULL;
beta->data = NULL;
delete[] dimSize;
delete ytmp;
...
...
@@ -311,6 +330,8 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
}
else
ShowNTErrors("TODO!");
BacktoCudaDev(x->devID, devIDBackup);
}
#endif
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论