Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
杨迪
NiuTrans.Tensor
Commits
0b43acf6
Commit
0b43acf6
authored
Feb 03, 2019
by
姜雨帆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
896e5231
隐藏空白字符变更
内嵌
并排
正在显示
21 个修改的文件
包含
441 行增加
和
82 行删除
+441
-82
source/network/XBackwardMath.cpp
+2
-0
source/sample/transformer/T2TAttention.cpp
+33
-6
source/sample/transformer/T2TAttention.h
+3
-2
source/sample/transformer/T2TDecoder.cpp
+38
-9
source/sample/transformer/T2TDecoder.h
+49
-3
source/sample/transformer/T2TEmbedding.cpp
+8
-2
source/sample/transformer/T2TEmbedding.h
+1
-1
source/sample/transformer/T2TEncoder.cpp
+5
-4
source/sample/transformer/T2TModel.cpp
+10
-7
source/sample/transformer/T2TOutput.cpp
+1
-1
source/sample/transformer/T2TTrainer.cpp
+126
-25
source/sample/transformer/T2TTrainer.h
+31
-0
source/sample/transformer/Transformer.cpp
+10
-5
source/tensor/XDevice.cpp
+5
-0
source/tensor/XDevice.h
+3
-0
source/tensor/XTensor.cpp
+6
-0
source/tensor/core/getandset/SetData.cpp
+34
-1
source/tensor/core/getandset/SetData.cu
+58
-9
source/tensor/core/getandset/SetData.cuh
+5
-1
source/tensor/core/getandset/SetData.h
+5
-1
source/tensor/function/Dropout.cpp
+8
-5
没有找到文件。
source/network/XBackwardMath.cpp
查看文件 @
0b43acf6
...
...
@@ -71,6 +71,8 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradMultiply
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_MULTIPLYDIM
)
GradMultiplyDim
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_MULTIPLYBROADCAST
)
GradMultiplyBroadcast
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_NEGATE
)
GradNegate
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_NORMALIZE
)
...
...
source/sample/transformer/T2TAttention.cpp
查看文件 @
0b43acf6
...
...
@@ -75,16 +75,19 @@ void T2TAttention::InitModel(int argc, char ** argv,
InitTensor2D
(
&
wq
,
d
,
dk
,
X_FLOAT
,
devID
,
mem
);
InitTensor2D
(
&
wv
,
d
,
dv
,
X_FLOAT
,
devID
,
mem
);
InitTensor2D
(
&
wa
,
d
,
d
,
X_FLOAT
,
devID
,
mem
);
InitTensor2D
(
&
wbig
,
d
,
3
*
d
,
X_FLOAT
,
devID
,
mem
);
float
scale
=
1.0
F
;
float
finfoutk
=
(
float
)
sqrt
(
6.0
F
*
scale
/
(
d
+
dk
));
float
finfoutv
=
(
float
)
sqrt
(
6.0
F
*
scale
/
(
d
+
dv
));
float
finfouta
=
(
float
)
sqrt
(
6.0
F
*
scale
/
(
d
+
d
));
float
finfoutbig
=
(
float
)
sqrt
(
6.0
F
*
scale
/
(
d
+
3
*
d
));
wk
.
SetDataRand
(
-
finfoutk
,
finfoutk
);
wq
.
SetDataRand
(
-
finfoutk
,
finfoutk
);
wv
.
SetDataRand
(
-
finfoutv
,
finfoutv
);
wa
.
SetDataRand
(
-
finfouta
,
finfouta
);
wbig
.
SetDataRand
(
-
finfoutbig
,
finfoutbig
);
}
/*
...
...
@@ -98,16 +101,40 @@ make the network
>> isTraining - indicates whether the model is used for training
<< return - multi-attention result
*/
XTensor
T2TAttention
::
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
,
XTensor
&
mask
,
bool
isTraining
)
XTensor
T2TAttention
::
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
,
XTensor
&
mask
,
bool
isTraining
,
bool
selfatt
)
{
XTensor
k2
;
XTensor
q2
;
XTensor
v2
;
/* linear transofmration before self-attention */
k2
=
MMul
(
k
,
wk
);
q2
=
MMul
(
q
,
wq
);
v2
=
MMul
(
v
,
wv
);
if
(
selfatt
){
XTensor
con
;
XList
split
;
con
=
MMul
(
k
,
wbig
);
int
d1
=
con
.
GetDim
(
0
);
int
d2
=
con
.
GetDim
(
1
);
int
d3
=
con
.
GetDim
(
2
)
/
3
;
InitTensor3D
(
&
k2
,
d1
,
d2
,
d3
,
X_FLOAT
,
devID
,
mem
);
InitTensor3D
(
&
q2
,
d1
,
d2
,
d3
,
X_FLOAT
,
devID
,
mem
);
InitTensor3D
(
&
v2
,
d1
,
d2
,
d3
,
X_FLOAT
,
devID
,
mem
);
split
.
Add
(
&
q2
);
split
.
Add
(
&
k2
);
split
.
Add
(
&
v2
);
Split
(
con
,
split
,
2
,
3
);
}
else
{
/* linear transofmration before self-attention */
k2
=
MMul
(
k
,
wk
);
q2
=
MMul
(
q
,
wq
);
v2
=
MMul
(
v
,
wv
);
}
XTensor
kheads
;
XTensor
qheads
;
...
...
source/sample/transformer/T2TAttention.h
查看文件 @
0b43acf6
...
...
@@ -59,7 +59,8 @@ public:
/* transformation after dot-product attention */
XTensor
wa
;
XTensor
wbig
;
/* size of transformed Q and K */
int
dk
;
...
...
@@ -95,7 +96,7 @@ public:
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the network */
XTensor
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
,
XTensor
&
mask
,
bool
isTraining
);
XTensor
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
,
XTensor
&
mask
,
bool
isTraining
,
bool
selfatt
);
};
}
...
...
source/sample/transformer/T2TDecoder.cpp
查看文件 @
0b43acf6
...
...
@@ -21,6 +21,8 @@
#include <math.h>
#include "T2TDecoder.h"
#include "T2TUtility.h"
#include "T2TLayerNormal.h"
#include "../../tensor/core/CHeader.h"
namespace
transformer
...
...
@@ -53,16 +55,43 @@ void AttDecoder::InitModel(int argc, char ** argv,
bool
myIsMasked
,
int
myIgnored
,
int
myDevID
,
XMem
*
myMem
)
{
AttEncoder
::
InitModel
(
argc
,
argv
,
myIsMasked
,
myIgnored
,
myDevID
,
myMem
);
//
AttEncoder::InitModel(argc, argv, myIsMasked, myIgnored, myDevID, myMem);
devID
=
myDevID
;
mem
=
myMem
;
ignored
=
myIgnored
;
LoadParamInt
(
argc
,
argv
,
"nlayer"
,
&
nlayer
,
6
);
LoadParamInt
(
argc
,
argv
,
"hsize"
,
&
hSize
,
DEFAULT_EMBEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"esize"
,
&
eSize
,
DEFAULT_EMBEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"vsizetgt"
,
&
vSize
,
-
1
);
LoadParamFloat
(
argc
,
argv
,
"dropout"
,
&
dropoutP
,
0
);
CheckNTErrors
(
nlayer
>=
1
,
"We have one encoding layer at least!"
);
CheckNTErrors
(
vSize
>
1
,
"set vocabulary size by
\"
-vsize
\"
"
);
/* embedding model */
embedder
.
InitModel
(
argc
,
argv
,
devID
,
mem
,
false
);
attentions
=
new
T2TAttention
[
nlayer
];
fnns
=
new
T2TFNN
[
nlayer
];
attLayerNorms
=
new
T2TLN
[
nlayer
];
fnnLayerNorms
=
new
T2TLN
[
nlayer
];
attentionsEnde
=
new
T2TAttention
[
nlayer
];
attEndeLayerNorms
=
new
T2TLN
[
nlayer
];
/* initialize the stacked layers */
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
){
attentionsEnde
[
i
].
InitModel
(
argc
,
argv
,
myIsMasked
,
myIgnored
,
myDevID
,
myMem
);
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
)
{
attentions
[
i
].
InitModel
(
argc
,
argv
,
myIsMasked
,
myIgnored
,
myDevID
,
myMem
);
fnns
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
attLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
fnnLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
attentionsEnde
[
i
].
InitModel
(
argc
,
argv
,
true
,
myIgnored
,
myDevID
,
myMem
);
attEndeLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
}
}
/*
...
...
@@ -82,7 +111,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
x
=
Dropout
(
x
,
dropoutP
,
2
);
x
=
Dropout
(
x
,
dropoutP
);
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
){
XTensor
att
;
...
...
@@ -93,11 +122,11 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/******************/
/* self attention */
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
,
mask
,
isTraining
);
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
,
mask
,
isTraining
,
true
);
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
att
=
Dropout
(
att
,
dropoutP
,
2
);
att
=
Dropout
(
att
,
dropoutP
);
/* residual connection */
res
=
Sum
(
att
,
x
);
...
...
@@ -107,11 +136,11 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/*****************************/
/* encoder-decoder attention */
ende
=
attentionsEnde
[
i
].
Make
(
outputEnc
,
x
,
outputEnc
,
maskEncDec
,
isTraining
);
ende
=
attentionsEnde
[
i
].
Make
(
outputEnc
,
x
,
outputEnc
,
maskEncDec
,
isTraining
,
false
);
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
ende
=
Dropout
(
ende
,
dropoutP
,
2
);
ende
=
Dropout
(
ende
,
dropoutP
);
/* residual connection */
res
=
Sum
(
ende
,
x
);
...
...
@@ -125,7 +154,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor &mask, X
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
fnn
=
Dropout
(
fnn
,
dropoutP
,
2
);
fnn
=
Dropout
(
fnn
,
dropoutP
);
/* residual connection */
res
=
Sum
(
fnn
,
x
);
...
...
source/sample/transformer/T2TDecoder.h
查看文件 @
0b43acf6
...
...
@@ -27,9 +27,56 @@
namespace
transformer
{
class
AttDecoder
:
public
AttEncoder
class
AttDecoder
{
public
:
/* device id */
int
devID
;
/* memory pool */
XMem
*
mem
;
/* layer number */
int
nlayer
;
/* hidden layer size of the FNN layer */
int
hSize
;
/* embedding size */
int
eSize
;
/* vocabulary size */
int
vSize
;
/* dropout probability */
DTYPE
dropoutP
;
/* some positions can be ignored in attention. this is useful in lm where the first position needs
* special design for the attention model. */
int
ignored
;
/* embedding of word at each position */
T2TEmbedder
embedder
;
/* FNN model of each layer */
T2TFNN
*
fnns
;
/* attention model of each layer */
T2TAttention
*
attentions
;
/* layer normalization for fnn */
T2TLN
*
fnnLayerNorms
;
/* layer normalization for attention */
T2TLN
*
attLayerNorms
;
/* input tensor of the encoder */
XTensor
*
input
;
/* output tensor of the encoder */
XTensor
*
output
;
/* encoder-decoder attention model of each layer */
T2TAttention
*
attentionsEnde
;
...
...
@@ -53,4 +100,4 @@ public:
}
#endif
\ No newline at end of file
#endif
source/sample/transformer/T2TEmbedding.cpp
查看文件 @
0b43acf6
...
...
@@ -48,12 +48,18 @@ initialize the model
>> myDevID - device id
>> myMem - the memory pool
*/
void
T2TEmbedder
::
InitModel
(
int
argc
,
char
**
argv
,
int
myDevID
,
XMem
*
myMem
)
void
T2TEmbedder
::
InitModel
(
int
argc
,
char
**
argv
,
int
myDevID
,
XMem
*
myMem
,
bool
isEnc
)
{
devID
=
myDevID
;
mem
=
myMem
;
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
-
1
);
if
(
isEnc
){
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
-
1
);
}
else
{
LoadParamInt
(
argc
,
argv
,
"vsizetgt"
,
&
vSize
,
-
1
);
}
//LoadParamInt(argc, argv, "vsize", &vSize, -1);
LoadParamInt
(
argc
,
argv
,
"maxlen"
,
&
maxLength
,
512
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
eSize
,
DEFAULT_EMBEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
d
,
DEFAULT_EMBEDDING_SIZE
);
...
...
source/sample/transformer/T2TEmbedding.h
查看文件 @
0b43acf6
...
...
@@ -71,7 +71,7 @@ public:
~
T2TEmbedder
();
/* initialize the model */
void
InitModel
(
int
argc
,
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
void
InitModel
(
int
argc
,
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
,
bool
isEnc
=
true
);
/* make positional embeddings */
void
MakePosEmbedding
(
int
eSize
,
int
d
,
int
length
);
...
...
source/sample/transformer/T2TEncoder.cpp
查看文件 @
0b43acf6
...
...
@@ -107,7 +107,7 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
x
=
Dropout
(
x
,
dropoutP
,
2
);
x
=
Dropout
(
x
,
dropoutP
);
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
){
XTensor
att
;
...
...
@@ -116,11 +116,11 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
XTensor
res
;
/* self attention */
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
,
mask
,
isTraining
);
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
,
mask
,
isTraining
,
true
);
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
att
=
Dropout
(
att
,
dropoutP
,
2
);
att
=
Dropout
(
att
,
dropoutP
);
/* residual connection */
res
=
Sum
(
att
,
x
);
...
...
@@ -133,7 +133,7 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, XTensor &maskEncDec, boo
/* dropout */
if
(
isTraining
&&
dropoutP
>
0
)
fnn
=
Dropout
(
fnn
,
dropoutP
,
2
);
fnn
=
Dropout
(
fnn
,
dropoutP
);
/* residual connection */
res
=
Sum
(
fnn
,
x
);
...
...
@@ -160,3 +160,4 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool isTraining)
}
}
source/sample/transformer/T2TModel.cpp
查看文件 @
0b43acf6
...
...
@@ -274,9 +274,10 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
_Sum
(
&
maskEnc
,
padding3
,
&
maskEnc
);
encoding
=
MakeEncoder
(
inputEnc
,
maskEnc
,
isTraining
);
//encoding.Dump(stderr, "encoding",10);
decoding
=
MakeDecoder
(
inputDec
,
encoding
,
maskDec
,
maskEncDec
,
isTraining
);
//decoding.Dump(stderr, "decoding", 10);
outputLayer
->
Make
(
decoding
,
output
);
delete
[]
dims
;
...
...
@@ -300,9 +301,10 @@ void T2TModel::GetParams(XList &list)
list
.
Add
(
&
encoder
->
fnns
[
i
].
b1
);
list
.
Add
(
&
encoder
->
fnns
[
i
].
w2
);
list
.
Add
(
&
encoder
->
fnns
[
i
].
b2
);
list
.
Add
(
&
encoder
->
attentions
[
i
].
wk
);
list
.
Add
(
&
encoder
->
attentions
[
i
].
wq
);
list
.
Add
(
&
encoder
->
attentions
[
i
].
wv
);
//list.Add(&encoder->attentions[i].wk);
//list.Add(&encoder->attentions[i].wq);
//list.Add(&encoder->attentions[i].wv);
list
.
Add
(
&
encoder
->
attentions
[
i
].
wbig
);
list
.
Add
(
&
encoder
->
attentions
[
i
].
wa
);
list
.
Add
(
&
encoder
->
fnnLayerNorms
[
i
].
w
);
list
.
Add
(
&
encoder
->
fnnLayerNorms
[
i
].
b
);
...
...
@@ -324,9 +326,10 @@ void T2TModel::GetParams(XList &list)
list
.
Add
(
&
decoder
->
attentionsEnde
[
i
].
wa
);
list
.
Add
(
&
decoder
->
attEndeLayerNorms
[
i
].
w
);
list
.
Add
(
&
decoder
->
attEndeLayerNorms
[
i
].
b
);
list
.
Add
(
&
decoder
->
attentions
[
i
].
wk
);
list
.
Add
(
&
decoder
->
attentions
[
i
].
wq
);
list
.
Add
(
&
decoder
->
attentions
[
i
].
wv
);
//list.Add(&decoder->attentions[i].wk);
//list.Add(&decoder->attentions[i].wq);
//list.Add(&decoder->attentions[i].wv);
list
.
Add
(
&
decoder
->
attentions
[
i
].
wbig
);
list
.
Add
(
&
decoder
->
attentions
[
i
].
wa
);
list
.
Add
(
&
decoder
->
fnnLayerNorms
[
i
].
w
);
list
.
Add
(
&
decoder
->
fnnLayerNorms
[
i
].
b
);
...
...
source/sample/transformer/T2TOutput.cpp
查看文件 @
0b43acf6
...
...
@@ -56,7 +56,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
float
minmax
=
0
;
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
-
1
);
LoadParamInt
(
argc
,
argv
,
"vsize
tgt
"
,
&
vSize
,
-
1
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
inSize
,
DEFAULT_EMBEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"d"
,
&
hSize
,
DEFAULT_EMBEDDING_SIZE
);
LoadParamFloat
(
argc
,
argv
,
"outputminmax"
,
&
minmax
,
0.08
F
);
...
...
source/sample/transformer/T2TTrainer.cpp
查看文件 @
0b43acf6
...
...
@@ -41,12 +41,15 @@ T2TTrainer::T2TTrainer()
seqLen2
=
NULL
;
nseqBuf
=
0
;
nextSeq
=
-
1
;
nextBatch
=
-
1
;
argNum
=
0
;
argArray
=
NULL
;
buf
=
NULL
;
buf2
=
NULL
;
bufBatch
=
NULL
;
bufSize
=
0
;
bufBatchSize
=
0
;
seqOffset
=
NULL
;
}
...
...
@@ -55,6 +58,7 @@ T2TTrainer::~T2TTrainer()
{
delete
[]
buf
;
delete
[]
buf2
;
delete
[]
bufBatch
;
delete
[]
seqLen
;
delete
[]
seqLen2
;
delete
[]
seqOffset
;
...
...
@@ -117,9 +121,11 @@ void T2TTrainer::Init(int argc, char ** argv)
LoadParamBool
(
argc
,
argv
,
"smallbatch"
,
&
isSmallBatch
,
true
);
LoadParamBool
(
argc
,
argv
,
"bigbatch"
,
&
isBigBatch
,
false
);
LoadParamBool
(
argc
,
argv
,
"debug"
,
&
isDebugged
,
false
);
LoadParamBool
(
argc
,
argv
,
"randbatch"
,
&
isRandomBatch
,
false
);
buf
=
new
int
[
bufSize
];
buf2
=
new
int
[
bufSize
];
bufBatch
=
new
BatchNode
[
bufSize
];
seqLen
=
new
int
[
bufSize
];
seqLen2
=
new
int
[
bufSize
];
seqOffset
=
new
int
[
bufSize
];
...
...
@@ -171,6 +177,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
PrepareModel
(
model
);
double
startT
=
GetClockSec
();
FILE
*
fileen
=
fopen
(
"enc.txt"
,
"w"
);
FILE
*
filede
=
fopen
(
"dec.txt"
,
"w"
);
for
(
epoch
=
1
;
epoch
<=
nepoch
;
epoch
++
){
#ifndef WIN32
...
...
@@ -205,6 +214,10 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
CheckNTErrors
(
batchEnc
.
order
==
2
,
"wrong tensor order of the sequence batch"
);
//batchEnc.Dump(stderr, "enc",1);
//batchDec.Dump(stderr, "dec",1);
//paddingDec.Dump(stderr, "paddec");
/* output probabilities */
XTensor
output
;
...
...
@@ -222,17 +235,18 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
LabelSmooth
(
&
gold
,
&
goldSmoothed
,
labelSmoothingP
);
/* make paddings for the output */
if
(
output
.
GetDim
(
0
)
>
1
)
if
(
output
.
GetDim
(
0
)
>
0
)
PadOutput
(
&
output
,
&
gold
,
&
paddingDec
);
/* get probabilities */
float
prob
=
GetProb
(
&
output
,
&
gold
,
NULL
);
//printf("%f\n", prob);
//float prob = 0;
DTYPE
lossLocal
=
-
prob
/
wc
;
bool
doUpdate
=
(
!
IsNAN
(
lossLocal
)
&&
!
IsINF
(
lossLocal
)
&&
lossLocal
<
1e3
F
);
XTensor
&
g
=
labelSmoothingP
>
0
?
goldSmoothed
:
gold
;
//doUpdate = false;
if
(
doUpdate
)
{
/* recale the output for normalized loss */
...
...
@@ -292,6 +306,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
MakeCheckpoint
(
model
,
validFN
,
modelFN
,
"epoch"
,
epoch
);
}
fclose
(
fileen
);
fclose
(
filede
);
double
elapsed
=
GetClockSec
()
-
startT
;
epoch
=
MIN
(
epoch
,
nepoch
);
...
...
@@ -434,11 +451,11 @@ void T2TTrainer::MakeCheckpoint(T2TModel * model, const char * validFN, const ch
sprintf
(
fn2
,
"%s.%s.%03d.output"
,
modelFN
,
label
,
id
);
model
->
Dump
(
fn
);
if
(
validFN
!=
NULL
){
T2TTrainer
trainer
;
trainer
.
Init
(
argNum
,
argArray
);
trainer
.
Test
(
validFN
,
fn2
,
model
);
}
//
if(validFN != NULL){
//
T2TTrainer trainer;
//
trainer.Init(argNum, argArray);
//
trainer.Test(validFN, fn2, model);
//
}
delete
[]
fn
;
delete
[]
fn2
;
...
...
@@ -473,7 +490,8 @@ int T2TTrainer::LoadBuf(FILE * file, bool isSorted, int step)
int
wordCount
=
0
;
while
(
fgets
(
line
,
MAX_SEQUENCE_LENGTH
-
1
,
file
)){
int
len
=
(
int
)
strlen
(
line
);
if
(
line
[
0
]
==
'b'
)
break
;
while
(
line
[
len
-
1
]
==
'\r'
||
line
[
len
-
1
]
==
'\n'
){
line
[
len
-
1
]
=
0
;
len
--
;
...
...
@@ -544,9 +562,14 @@ int T2TTrainer::LoadBuf(FILE * file, bool isSorted, int step)
node
.
offset
=
i
;
node
.
p
=
buf
+
offset
;
node
.
size
=
0
;
for
(
int
j
=
0
;
j
<
step
;
j
++
)
int
max
=
0
;
for
(
int
j
=
0
;
j
<
step
;
j
++
){
node
.
size
+=
seqLen
[
i
+
j
];
node
.
value
=
seqLen
[
i
];
max
=
MAX
(
max
,
seqLen
[
i
+
j
]);
}
//node.value = seqLen[i+1]+seqLen[i];
//node.value = MAX(seqLen[i+1],seqLen[i]);
node
.
value
=
max
;
count
++
;
offset
+=
node
.
size
;
}
...
...
@@ -768,6 +791,12 @@ int T2TTrainer::LoadBatchLM(FILE * file,
return
sc
;
}
int
CompareBatchNode
(
const
void
*
a
,
const
void
*
b
)
{
return
((
BatchNode
*
)
b
)
->
key
-
((
BatchNode
*
)
a
)
->
key
;
}
/*
load a batch of sequences (for MT)
>> file - the handle to the data file
...
...
@@ -797,10 +826,70 @@ int T2TTrainer::LoadBatchMT(FILE * file,
int
devID
,
XMem
*
mem
,
bool
isTraining
)
{
if
(
nextSeq
<
0
||
nextSeq
>=
nseqBuf
)
//if (nextSeq < 0 || nextSeq >= nseqBuf)
// LoadBuf(file, isSorted, 2);
if
(
nextBatch
<
0
||
nextBatch
>=
bufBatchSize
)
{
LoadBuf
(
file
,
isSorted
,
2
);
int
seq
=
MAX
(
nextSeq
,
0
);
int
seq
=
0
;
bufBatchSize
=
0
;
nextBatch
=
0
;
/* we segment the buffer into batches */
while
(
seq
<
nseqBuf
)
{
int
wcEnc
=
0
;
int
wcDec
=
0
;
int
wnEnc
=
0
;
int
wnDec
=
0
;
int
maxEnc
=
0
;
int
maxDec
=
0
;
int
sc
=
0
;
while
(
seq
+
sc
<
nseqBuf
)
{
/* source-side sequence */
wnEnc
=
seqLen
[
seq
+
sc
];
/* target-side sequence */
wnDec
=
isDoubledEnd
?
seqLen
[
seq
+
sc
+
1
]
:
seqLen
[
seq
+
sc
+
1
]
-
1
;
int
tcEnc
=
isBigBatch
?
(
wcEnc
+
wnEnc
)
:
MAX
(
maxEnc
,
wnEnc
)
*
(
sc
+
2
)
/
2
;
int
tcDec
=
isBigBatch
?
(
wcDec
+
wnDec
)
:
MAX
(
maxDec
,
wnDec
)
*
(
sc
+
2
)
/
2
;
if
(
sc
!=
0
&&
sc
>
sBatch
*
2
&&
(
tcEnc
>
wBatch
||
tcDec
>
wBatch
))
break
;
wcEnc
+=
wnEnc
;
sc
+=
1
;
if
(
maxEnc
<
wnEnc
)
maxEnc
=
wnEnc
;
wcDec
+=
wnDec
;
sc
+=
1
;
if
(
maxDec
<
wnDec
)
maxDec
=
wnDec
;
}
BatchNode
&
batch
=
bufBatch
[
bufBatchSize
];
batch
.
beg
=
seq
;
batch
.
end
=
seq
+
sc
;
batch
.
maxEnc
=
maxEnc
;
batch
.
maxDec
=
maxDec
;
batch
.
key
=
rand
();
bufBatchSize
++
;
seq
=
seq
+
sc
;
}
if
(
isRandomBatch
)
qsort
(
bufBatch
,
bufBatchSize
,
sizeof
(
BatchNode
),
CompareBatchNode
);
}
/*int seq = MAX(nextSeq, 0);
int wcEnc = 0;
int wcDec = 0;
int wnEnc = 0;
...
...
@@ -813,10 +902,8 @@ int T2TTrainer::LoadBatchMT(FILE * file,
while(seq + sc < nseqBuf){
/* source-side sequence */
wnEnc = seqLen[seq + sc];
/* target-side sequence */
wnDec = isDoubledEnd ? seqLen[seq + sc + 1] : seqLen[seq + sc + 1] - 1;
int tcEnc = isBigBatch ? (wcEnc + wnEnc): MAX(maxEnc, wnEnc) * (sc + 2) / 2;
...
...
@@ -841,8 +928,18 @@ int T2TTrainer::LoadBatchMT(FILE * file,
nextSeq = seq + sc;
if(sc <= 0)
return 0;*/
if
(
bufBatchSize
<=
0
)
return
0
;
BatchNode
&
batch
=
bufBatch
[
nextBatch
++
];
int
seq
=
batch
.
beg
;
int
sc
=
batch
.
end
-
batch
.
beg
;
int
maxEnc
=
batch
.
maxEnc
;
int
maxDec
=
batch
.
maxDec
;
CheckNTErrors
(
sc
%
2
==
0
,
"The input samples must be paired"
);
int
sCount
=
sc
/
2
;
int
seqSize
=
0
;
int
dimsDec
[
3
]
=
{
sCount
,
maxDec
,
vsDec
};
...
...
@@ -861,13 +958,14 @@ int T2TTrainer::LoadBatchMT(FILE * file,
int
wCountEnc
=
0
;
int
wCountDec
=
0
;
int
wCountPad
=
0
;
int
wGold
=
0
;
wCount
=
0
;
int
*
batchEncValues
=
new
int
[
batchEnc
->
unitNum
];
int
*
batchDecValues
=
new
int
[
batchDec
->
unitNum
];
//MTYPE * paddingEncOffsets = new MTYPE[sc * maxEnc / 2];
//
MTYPE * paddingDecOffsets = new MTYPE[sc * maxDec / 2];
MTYPE
*
paddingDecOffsets
=
new
MTYPE
[
sc
*
maxDec
/
2
];
MTYPE
*
goldOffsets
=
new
MTYPE
[
sc
*
maxDec
/
2
];
memset
(
batchEncValues
,
0
,
sizeof
(
int
)
*
batchEnc
->
unitNum
);
...
...
@@ -901,7 +999,10 @@ int T2TTrainer::LoadBatchMT(FILE * file,
int
num
=
buf
[
seqOffset
[
s
]
+
w
];
batchDecValues
[
batchDec
->
GetOffset2D
(
sent
,
w
)]
=
num
;
//paddingDecOffsets[wCountDec] = paddingDec->GetOffset2D(sent, w);
if
(
w
<
len
-
1
){
paddingDecOffsets
[
wCountPad
++
]
=
paddingDec
->
GetOffset2D
(
sent
,
w
);
wCount
++
;
}
if
(
w
>
0
)
goldOffsets
[
wGold
++
]
=
gold
->
GetOffset3D
(
sent
,
w
-
1
,
buf
[
seqOffset
[
s
]
+
w
]);
...
...
@@ -911,7 +1012,7 @@ int T2TTrainer::LoadBatchMT(FILE * file,
else
goldOffsets
[
wGold
++
]
=
gold
->
GetOffset3D
(
sent
,
w
,
buf
[
seqOffset
[
s
]
+
w
+
1
]);
}
wCount
++
;
//
wCount++;
wCountDec
++
;
if
(
seqs
!=
NULL
)
seqs
[
seqSize
++
]
=
buf
[
seqOffset
[
s
]
+
w
];
...
...
@@ -924,19 +1025,19 @@ int T2TTrainer::LoadBatchMT(FILE * file,
}
batchDec
->
SetData
(
batchDecValues
,
batchDec
->
unitNum
);
//paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountDec
);
paddingDec
->
SetDataBatched
(
paddingDecOffsets
,
1.0
F
,
wCountPad
);
XTensor
*
tmp2
=
NewTensorBuf
(
paddingDec
,
devID
,
mem
);
_ConvertDataType
(
batchDec
,
tmp2
);
_NotEqual
(
tmp2
,
paddingDec
,
0
);
DelTensorBuf
(
tmp2
);
//
XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem);
//
_ConvertDataType(batchDec, tmp2);
//
_NotEqual(tmp2, paddingDec, 0);
//
DelTensorBuf(tmp2);
gold
->
SetDataBatched
(
goldOffsets
,
1.0
F
,
wGold
);
delete
[]
batchEncValues
;
delete
[]
batchDecValues
;
//delete[] paddingEncOffsets;
//
delete[] paddingDecOffsets;
delete
[]
paddingDecOffsets
;
delete
[]
goldOffsets
;
return
sc
;
...
...
source/sample/transformer/T2TTrainer.h
查看文件 @
0b43acf6
...
...
@@ -33,6 +33,25 @@ using namespace nts;
namespace
transformer
{
/* node to keep batch information */
struct
BatchNode
{
/* begining position */
int
beg
;
/* end position */
int
end
;
/* maximum word number on the encoder side */
int
maxEnc
;
/* maximum word number on the decoder side */
int
maxDec
;
/* a key for sorting */
int
key
;
};
/* trainer of the T2T model */
class
T2TTrainer
{
...
...
@@ -49,9 +68,15 @@ public:
/* another buffer */
int
*
buf2
;
/* batch buf */
BatchNode
*
bufBatch
;
/* buffer size */
int
bufSize
;
/* size of batch buffer */
int
bufBatchSize
;
/* length of each sequence */
int
*
seqLen
;
...
...
@@ -66,6 +91,9 @@ public:
/* offset for next sequence in the buffer */
int
nextSeq
;
/* offset for next batch */
int
nextBatch
;
/* indicates whether the sequence is sorted by length */
bool
isLenSorted
;
...
...
@@ -142,6 +170,9 @@ public:
/* counterpart of "isSmallBatch" */
bool
isBigBatch
;
/* randomize batches */
bool
isRandomBatch
;
/* indicates whether we intend to debug the net */
bool
isDebugged
;
...
...
source/sample/transformer/Transformer.cpp
查看文件 @
0b43acf6
...
...
@@ -59,23 +59,28 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString
(
argc
,
args
,
"test"
,
testFN
,
""
);
LoadParamString
(
argc
,
args
,
"output"
,
outputFN
,
""
);
srand
((
unsigned
int
)
time
(
NULL
));
T2TTrainer
trainer
;
trainer
.
Init
(
argc
,
args
);
T2TModel
model
;
model
.
InitModel
(
argc
,
args
);
//if(strcmp(modelFN, ""))
//model.Read(modelFN);
/* learn model parameters */
if
(
strcmp
(
trainFN
,
""
))
trainer
.
Train
(
trainFN
,
testFN
,
strcmp
(
modelFN
,
""
)
?
modelFN
:
"checkpoint.model"
,
&
model
);
/* save the final model */
if
(
strcmp
(
modelFN
,
""
)
&&
strcmp
(
trainFN
,
""
))
model
.
Dump
(
modelFN
);
//
if(strcmp(modelFN, "") && strcmp(trainFN, ""))
//
model.Dump(modelFN);
/* load the model if neccessary */
if
(
strcmp
(
modelFN
,
""
))
model
.
Read
(
modelFN
);
//
if(strcmp(modelFN, ""))
//
model.Read(modelFN);
T2TTrainer
tester
;
tester
.
Init
(
argc
,
args
);
...
...
source/tensor/XDevice.cpp
查看文件 @
0b43acf6
...
...
@@ -60,6 +60,7 @@ XDevice::~XDevice()
cublasDestroy
(
cublasHandle
);
if
(
stream
!=
NULL
)
delete
stream
;
curandDestroyGenerator
(
gen
);
#endif
}
...
...
@@ -82,6 +83,10 @@ void XDevice::Init(int myDevID)
cudaDeviceProp
prop
;
cudaSetDevice
(
myDevID
);
curandCreateGenerator
(
&
gen
,
CURAND_RNG_PSEUDO_DEFAULT
);
curandSetPseudoRandomGeneratorSeed
(
gen
,
seed
);
if
(
cudaGetDeviceProperties
(
&
prop
,
devID
)
!=
cudaSuccess
){
XPRINT1
(
0
,
stderr
,
"cannot get GPU(%d) information."
,
devID
);
exit
(
1
);
...
...
source/tensor/XDevice.h
查看文件 @
0b43acf6
...
...
@@ -112,6 +112,9 @@ public:
/* specify if the handle is initialized */
bool
isHandleReady
;
/* generater of random numbers */
curandGenerator_t
gen
;
#endif
...
...
source/tensor/XTensor.cpp
查看文件 @
0b43acf6
...
...
@@ -1614,11 +1614,17 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
else
if
(
dataType
==
X_INT
)
{
int
end
=
MIN
(
n
>
0
?
beg
+
n
:
beg
+
unitNum
,
unitNum
);
for
(
int
i
=
beg
;
i
<
end
;
i
++
){
if
((
i
%
(
dimSize
[
1
])
==
0
)
&&
(
i
!=
0
))
{
fprintf
(
file
,
"
\n
"
);
}
int
f
=
((
int
*
)
d
)[
i
];
if
(
i
==
beg
)
fprintf
(
file
,
"%d"
,
f
);
else
fprintf
(
file
,
" %d"
,
f
);
//if((i%(dimSize[1]-1) == 0)&&(i!=0)) {
//fprintf(file, " \n");
//}
}
}
else
...
...
source/tensor/core/getandset/SetData.cpp
查看文件 @
0b43acf6
...
...
@@ -387,7 +387,7 @@ generate data items with a uniform distribution in [lower, upper]
>> lower - lower value of the range
>> upper - upper value of the range
*/
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
)
void
_SetDataRand
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
)
{
CheckNTErrors
(
upper
>
lower
,
"the high value must be greater than low value!"
);
...
...
@@ -430,6 +430,39 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
//delete t2;
}
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void
_SetDataRandP
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
)
{
CheckNTErrors
(
tensor
->
dataType
==
DEFAULT_DTYPE
,
"TODO"
);
if
(
tensor
->
devID
<
0
)
{
_SetDataRand
(
tensor
,
lower
,
upper
);
DTYPE
*
data
=
(
DTYPE
*
)
tensor
->
data
;
for
(
int
i
=
0
;
i
<
tensor
->
unitNum
;
i
++
)
{
if
(
data
[
i
]
>=
p
)
data
[
i
]
=
value
;
else
data
[
i
]
=
0
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataRandP
(
tensor
,
lower
,
upper
,
p
,
value
);
#else
ShowNTErrors
(
"Please recompile the code by specifying USE_CUDA"
);
#endif // USE_CUDA
}
}
/*
generate data items with a normal distribution with specified mean and standard deviation
...
...
source/tensor/core/getandset/SetData.cu
查看文件 @
0b43acf6
...
...
@@ -185,6 +185,26 @@ void KernelSetDataRandDouble(double * d, int size, DTYPE lower, DTYPE variance)
}
}
/*
set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
}
/*
set data items along with a given dimension (and keep the remaining items unchanged) - kernel version
>> tensor - the tensor whose data array would be initialized
...
...
@@ -437,7 +457,7 @@ generate data items with a uniform distribution in [lower, upper]
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
void _CudaSetDataRand(
const
XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
...
...
@@ -452,17 +472,46 @@ void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
curandGenerator_t gen;
curandCreateGenerator (&gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen, time(NULL));
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen , (float*)tensor->data , tensor->unitNum);
curandDestroyGenerator(gen);
DTYPE variance = upper - lower;
if (tensor->dataType == X_FLOAT)
KernelSetDataRandFloat <<<blocks, threads >>>((float*) tensor->data, tensor->unitNum, lower, variance);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataRandDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, lower, variance);
if(variance != 1.0F || lower != 0){
if (tensor->dataType == X_FLOAT)
KernelSetDataRandFloat <<<blocks, threads >>>((float*) tensor->data, tensor->unitNum, lower, variance);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataRandDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, lower, variance);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
_CudaSetDataRand(tensor, lower, upper);
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataPCut << <blocks, threads >> >((float*)tensor->data, tensor->unitNum, p, value);
BacktoCudaDev(tensor->devID, devIDBackup);
}
...
...
source/tensor/core/getandset/SetData.cuh
查看文件 @
0b43acf6
...
...
@@ -47,7 +47,11 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
/* set the data with an array of offsets */
void _CudaSetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYPE num);
...
...
source/tensor/core/getandset/SetData.h
查看文件 @
0b43acf6
...
...
@@ -55,7 +55,11 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
void
_SetDataLowTri
(
XTensor
*
tensor
,
DTYPE
p
,
int
shift
);
/* generate data items with a uniform distribution in [lower, upper] */
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
void
_SetDataRand
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void
_SetDataRandP
(
const
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
,
DTYPE
p
,
DTYPE
value
);
/* generate data items with a normal distribution with specified mean and standard deviation */
void
_SetDataRandN
(
XTensor
*
tensor
,
DTYPE
mean
=
0
.
0
F
,
DTYPE
standardDeviation
=
1
.
0
F
);
...
...
source/tensor/function/Dropout.cpp
查看文件 @
0b43acf6
...
...
@@ -26,6 +26,7 @@
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/getandset/SetData.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor
...
...
@@ -147,17 +148,21 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
XTensor
mask
;
DTYPE
*
maskArray
=
NULL
;
DTYPE
scaleFactor
=
(
DTYPE
)
1.0
/
((
DTYPE
)
1.0
-
dropProb
);
if
(
leadingDim
<
0
&&
leadingDim2
<
0
){
ShowNTErrors
(
"TODO"
);
XTensor
mask
;
InitTensor
(
&
mask
,
&
x
);
_SetDataRandP
(
&
mask
,
0
,
1.0
F
,
dropProb
,
scaleFactor
);
return
Multiply
(
x
,
mask
);
}
else
if
(
leadingDim2
<
0
){
int
n
=
leadingDim
;
CheckNTErrors
(
n
>=
0
&&
n
<
x
.
order
,
"Wrong leadingDim!"
);
DTYPE
scaleFactor
=
(
DTYPE
)
1.0
/
((
DTYPE
)
1.0
-
dropProb
);
/* generate a mask tensor with probability p */
int
unitNum
=
x
.
dimSize
[
n
];
maskArray
=
new
DTYPE
[
unitNum
];
...
...
@@ -180,8 +185,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors
(
n
>=
0
&&
n
<
x
.
order
,
"Wrong leadingDim!"
);
CheckNTErrors
(
m
>=
0
&&
m
<
x
.
order
,
"Wrong leadingDim!"
);
DTYPE
scaleFactor
=
(
DTYPE
)
1.0
/
((
DTYPE
)
1.0
-
dropProb
);
/* generate a mask tensor with probability p */
int
unitNum
=
x
.
dimSize
[
n
]
*
x
.
dimSize
[
m
];
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论