Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
2c4061e9
Commit
2c4061e9
authored
Jul 30, 2019
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed FNNLM of branch of xiao
parent
3800528b
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
570 行增加
和
564 行删除
+570
-564
source/network/Main.cpp
+0
-1
source/sample/fnnlm/FNNLM.cpp
+340
-337
source/tensor/Main.cpp
+219
-215
source/tensor/loss/CrossEntropy.cu
+11
-11
没有找到文件。
source/network/Main.cpp
查看文件 @
2c4061e9
...
...
@@ -24,7 +24,6 @@
#include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/test/Test.h"
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
...
...
source/sample/fnnlm/FNNLM.cpp
查看文件 @
2c4061e9
...
...
@@ -15,7 +15,7 @@
* limitations under the License.
*/
/*
/*
*
* This is a simple impelementation of the feed-forward network-baesd language
* model (FNNLM). See more details about FNNLM in
...
...
@@ -32,6 +32,7 @@
#include "../../tensor/XDevice.h"
#include "../../tensor/function/FHeader.h"
#include "../../network/XNet.h"
#include "../../tensor/core/math/ScaleAndShift.h"
namespace
fnnlm
{
...
...
@@ -39,50 +40,50 @@ namespace fnnlm
#define MAX_NAME_LENGTH 1024
#define MAX_LINE_LENGTH_HERE 1024 * 32
char
trainFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the training data
char
modelFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the FNN model
char
testFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the test data
char
outputFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the result data
float
learningRate
=
0.01
F
;
// learning rate
int
nStep
=
10000000
;
// max learning steps (or model updates)
int
nEpoch
=
10
;
// max training epochs
float
minmax
=
0.08
F
;
// range [-p,p] for parameter initialization
int
sentBatch
=
0
;
// batch size at the sentence level
int
wordBatch
=
1
;
// batch size at the word level
bool
shuffled
=
false
;
// shuffled the training data file or not
bool
autoDiff
=
false
;
// indicator of automatic differentiation
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
);
void
Init
(
FNNModel
&
model
);
void
Check
(
FNNModel
&
model
);
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
);
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
);
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
);
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
);
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
);
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
);
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
=
NULL
);
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
);
void
Read
(
const
char
*
fn
,
FNNModel
&
model
);
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
);
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
);
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
char
trainFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the training data
char
modelFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the FNN model
char
testFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the test data
char
outputFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the result data
float
learningRate
=
0.01
F
;
// learning rate
int
nStep
=
10000000
;
// max learning steps (or model updates)
int
nEpoch
=
10
;
// max training epochs
float
minmax
=
0.08
F
;
// range [-p,p] for parameter initialization
int
sentBatch
=
0
;
// batch size at the sentence level
int
wordBatch
=
1
;
// batch size at the word level
bool
shuffled
=
false
;
// shuffled the training data file or not
bool
autoDiff
=
false
;
// indicator of automatic differentiation
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
);
void
Init
(
FNNModel
&
model
);
void
Check
(
FNNModel
&
model
);
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
);
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
);
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
);
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
);
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
);
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
);
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
=
NULL
);
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
);
void
Read
(
const
char
*
fn
,
FNNModel
&
model
);
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
);
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
);
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
int
itemNum
,
int
devID
,
XMem
*
mem
);
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
);
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
);
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
);
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
);
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
);
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
/*
entry of the program
>> argc - number of the arguments
>> argv - pointers to the arguments
<< return - error code
/*
entry of the program
>> argc - number of the arguments
>> argv - pointers to the arguments
<< return - error code
arguments:
arguments:
-train S: specify training data file name
-model S: specify model file name
-test S: specify test data file name
...
...
@@ -110,10 +111,10 @@ arguments:
E.g.,
0 29 2 11 1
might be a line of the file.
*/
int
FNNLMMain
(
int
argc
,
const
char
**
argv
)
{
if
(
argc
==
0
)
*/
int
FNNLMMain
(
int
argc
,
const
char
**
argv
)
{
if
(
argc
==
0
)
return
1
;
FNNModel
model
;
...
...
@@ -128,127 +129,127 @@ int FNNLMMain(int argc, const char ** argv)
Init
(
model
);
/* learn model parameters */
if
(
strcmp
(
trainFN
,
""
))
if
(
strcmp
(
trainFN
,
""
))
Train
(
trainFN
,
shuffled
,
model
);
/* save the final model */
if
(
strcmp
(
modelFN
,
""
)
&&
strcmp
(
trainFN
,
""
))
if
(
strcmp
(
modelFN
,
""
)
&&
strcmp
(
trainFN
,
""
))
Dump
(
modelFN
,
model
);
/* load the model if neccessary */
if
(
strcmp
(
modelFN
,
""
))
if
(
strcmp
(
modelFN
,
""
))
Read
(
modelFN
,
model
);
/* test the model on the new data */
if
(
strcmp
(
testFN
,
""
)
&&
strcmp
(
outputFN
,
""
))
if
(
strcmp
(
testFN
,
""
)
&&
strcmp
(
outputFN
,
""
))
Test
(
testFN
,
outputFN
,
model
);
return
0
;
}
/*
load arguments
>> argc - number of the arguments
>> argv - pointers to the arguments
>> model - the fnn model
*/
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
)
{
}
/*
load arguments
>> argc - number of the arguments
>> argv - pointers to the arguments
>> model - the fnn model
*/
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
)
{
fprintf
(
stderr
,
"args:
\n
"
);
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
if
(
!
strcmp
(
argv
[
i
],
"-train"
)
&&
i
+
1
<
argc
)
{
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
if
(
!
strcmp
(
argv
[
i
],
"-train"
)
&&
i
+
1
<
argc
)
{
strcpy
(
trainFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -train=%s
\n
"
,
argv
[
i
+
1
]);
}
if
(
!
strcmp
(
argv
[
i
],
"-model"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-model"
)
&&
i
+
1
<
argc
)
{
strcpy
(
modelFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -model=%s
\n
"
,
argv
[
i
+
1
]);
}
if
(
!
strcmp
(
argv
[
i
],
"-test"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-test"
)
&&
i
+
1
<
argc
)
{
strcpy
(
testFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -test=%s
\n
"
,
argv
[
i
+
1
]);
}
if
(
!
strcmp
(
argv
[
i
],
"-output"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-output"
)
&&
i
+
1
<
argc
)
{
strcpy
(
outputFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -output=%s
\n
"
,
argv
[
i
+
1
]);
}
if
(
!
strcmp
(
argv
[
i
],
"-n"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-n"
)
&&
i
+
1
<
argc
)
{
model
.
n
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -n=%d
\n
"
,
model
.
n
);
}
if
(
!
strcmp
(
argv
[
i
],
"-esize"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-esize"
)
&&
i
+
1
<
argc
)
{
model
.
eSize
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -esize=%d
\n
"
,
model
.
eSize
);
}
if
(
!
strcmp
(
argv
[
i
],
"-vsize"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-vsize"
)
&&
i
+
1
<
argc
)
{
model
.
vSize
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -vsize=%d
\n
"
,
model
.
vSize
);
}
if
(
!
strcmp
(
argv
[
i
],
"-hdepth"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-hdepth"
)
&&
i
+
1
<
argc
)
{
model
.
hDepth
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -hdepth=%d
\n
"
,
model
.
hDepth
);
}
if
(
!
strcmp
(
argv
[
i
],
"-hsize"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-hsize"
)
&&
i
+
1
<
argc
)
{
model
.
hSize
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -hsize=%d
\n
"
,
model
.
hSize
);
}
if
(
!
strcmp
(
argv
[
i
],
"-lrate"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-lrate"
)
&&
i
+
1
<
argc
)
{
learningRate
=
(
float
)
atof
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -lrate=%f
\n
"
,
learningRate
);
}
if
(
!
strcmp
(
argv
[
i
],
"-nstep"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-nstep"
)
&&
i
+
1
<
argc
)
{
nStep
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -nstep=%d
\n
"
,
nStep
);
}
if
(
!
strcmp
(
argv
[
i
],
"-nepoch"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-nepoch"
)
&&
i
+
1
<
argc
)
{
nEpoch
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -nepoch=%d
\n
"
,
nEpoch
);
}
if
(
!
strcmp
(
argv
[
i
],
"-minmax"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-minmax"
)
&&
i
+
1
<
argc
)
{
minmax
=
(
float
)
fabs
(
atof
(
argv
[
i
+
1
]));
fprintf
(
stderr
,
" -minmax=%f
\n
"
,
minmax
);
}
if
(
!
strcmp
(
argv
[
i
],
"-batch"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-batch"
)
&&
i
+
1
<
argc
)
{
sentBatch
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -batch=%d
\n
"
,
sentBatch
);
}
if
(
!
strcmp
(
argv
[
i
],
"-wbatch"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-wbatch"
)
&&
i
+
1
<
argc
)
{
wordBatch
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -wbatch=%d
\n
"
,
wordBatch
);
}
if
(
!
strcmp
(
argv
[
i
],
"-shuffle"
))
{
if
(
!
strcmp
(
argv
[
i
],
"-shuffle"
))
{
shuffled
=
true
;
fprintf
(
stderr
,
" -shuffle=true
\n
"
);
}
if
(
!
strcmp
(
argv
[
i
],
"-autodiff"
))
{
if
(
!
strcmp
(
argv
[
i
],
"-autodiff"
))
{
autoDiff
=
true
;
fprintf
(
stderr
,
" -autodiff=true
\n
"
);
}
if
(
!
strcmp
(
argv
[
i
],
"-dev"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-dev"
)
&&
i
+
1
<
argc
)
{
model
.
devID
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -dev=%d
\n
"
,
model
.
devID
);
}
}
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
if
(
!
strcmp
(
argv
[
i
],
"-mempool"
))
model
.
mem
=
new
XMem
(
model
.
devID
);
}
}
}
/* check model settings */
void
Check
(
FNNModel
&
model
)
{
/* check model settings */
void
Check
(
FNNModel
&
model
)
{
CheckErrors
(
model
.
n
>
0
&&
model
.
n
<=
MAX_N_GRAM
,
"The LM order is out of range (use -n)!"
);
CheckErrors
(
model
.
vSize
>
0
,
"no vocabulary size found (use -vsize)!"
);
CheckErrors
(
model
.
eSize
>
0
,
"no embedding size found (use -esize)!"
);
}
}
/* make a hard copy of the fnn model */
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
)
{
/* make a hard copy of the fnn model */
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
)
{
InitTensorV2
(
&
tgt
.
embeddingW
,
&
src
.
embeddingW
);
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
InitTensorV2
(
&
tgt
.
hiddenW
[
i
],
&
src
.
hiddenW
[
i
]);
InitTensorV2
(
&
tgt
.
hiddenB
[
i
],
&
src
.
hiddenB
[
i
]);
}
...
...
@@ -262,33 +263,33 @@ void Copy(FNNModel &tgt, FNNModel &src)
tgt
.
vSize
=
src
.
vSize
;
tgt
.
devID
=
src
.
devID
;
tgt
.
useMemPool
=
src
.
useMemPool
;
if
(
src
.
mem
!=
NULL
)
{
if
(
src
.
mem
!=
NULL
)
{
tgt
.
mem
=
new
XMem
(
src
.
mem
->
devID
,
src
.
mem
->
mode
,
src
.
mem
->
maxBlockSize
,
src
.
mem
->
blockNum
,
src
.
mem
->
bufSize
);
}
}
}
/*
reset model parameters
>> model - the model whose parameter (gradient) is set to 0
>> isNodeGrad - indicates whether the tensor node keeps the
/*
reset model parameters
>> model - the model whose parameter (gradient) is set to 0
>> isNodeGrad - indicates whether the tensor node keeps the
gradient information
*/
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
)
{
*/
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
)
{
if
(
isNodeGrad
)
{
if
(
model
.
embeddingW
.
grad
!=
NULL
)
if
(
model
.
embeddingW
.
grad
!=
NULL
)
model
.
embeddingW
.
grad
->
SetZeroAll
();
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
if
(
model
.
hiddenW
[
i
].
grad
!=
NULL
)
if
(
model
.
hiddenW
[
i
].
grad
!=
NULL
)
model
.
hiddenW
[
i
].
grad
->
SetZeroAll
();
if
(
model
.
hiddenB
[
i
].
grad
!=
NULL
)
if
(
model
.
hiddenB
[
i
].
grad
!=
NULL
)
model
.
hiddenB
[
i
].
grad
->
SetZeroAll
();
}
if
(
model
.
outputW
.
grad
!=
NULL
)
if
(
model
.
outputW
.
grad
!=
NULL
)
model
.
outputW
.
grad
->
SetZeroAll
();
if
(
model
.
outputB
.
grad
!=
NULL
)
if
(
model
.
outputB
.
grad
!=
NULL
)
model
.
outputB
.
grad
->
SetZeroAll
();
}
else
{
...
...
@@ -300,76 +301,78 @@ void Clear(FNNModel &model, bool isNodeGrad)
model
.
outputW
.
SetZeroAll
();
model
.
outputB
.
SetZeroAll
();
}
}
}
/*
initialize a 1d tensor using the fnn model setting
>> tensor - the tensor to initialize
>> num - number of items
>> model - the fnn model
*/
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
)
{
/*
initialize a 1d tensor using the fnn model setting
>> tensor - the tensor to initialize
>> num - number of items
>> model - the fnn model
*/
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
)
{
InitTensor1DV2
(
&
tensor
,
num
,
X_FLOAT
,
model
.
devID
);
}
/*
initialize a 2d tensor using the fnn model setting
>> tensor - the tensor to initialize
>> rowNum - number of rows
>> colNum - number of columns
>> model - the fnn model
*/
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
)
{
}
/*
initialize a 2d tensor using the fnn model setting
>> tensor - the tensor to initialize
>> rowNum - number of rows
>> colNum - number of columns
>> model - the fnn model
*/
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
)
{
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT
,
model
.
devID
);
}
}
/* initialize the model */
void
Init
(
FNNModel
&
model
)
{
/* initialize the model */
void
Init
(
FNNModel
&
model
)
{
/* create embedding parameter matrix: vSize * eSize */
InitModelTensor2D
(
model
.
embeddingW
,
model
.
vSize
,
model
.
eSize
,
model
);
model
.
embeddingW
.
SetVarFlag
();
/* create hidden layer parameter matrics */
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
{
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
{
/* hidden layer parameter matrix: (n-1)eSize * hsize if it is the first layer
hsize * hsize otherwise */
if
(
i
==
0
)
if
(
i
==
0
)
InitModelTensor2D
(
model
.
hiddenW
[
i
],
(
model
.
n
-
1
)
*
model
.
eSize
,
model
.
hSize
,
model
);
else
InitModelTensor2D
(
model
.
hiddenW
[
i
],
model
.
hSize
,
model
.
hSize
,
model
);
model
.
hiddenW
[
i
].
SetVarFlag
();
/* bias term: a row vector of hSize entries */
InitModelTensor1D
(
model
.
hiddenB
[
i
],
model
.
hSize
,
model
);
model
.
hiddenB
[
i
].
SetVarFlag
();
}
/* create the output layer parameter matrix and bias term */
int
iSize
=
model
.
hDepth
==
0
?
(
model
.
n
-
1
)
*
model
.
eSize
:
model
.
hSize
;
InitModelTensor2D
(
model
.
outputW
,
iSize
,
model
.
vSize
,
model
);
InitModelTensor1D
(
model
.
outputB
,
model
.
vSize
,
model
);
model
.
outputW
.
SetVarFlag
();
model
.
outputB
.
SetVarFlag
();
/* then, we initialize model parameters using a uniform distribution in range
of [-minmax, minmax] */
model
.
embeddingW
.
SetDataRand
(
-
minmax
,
minmax
);
model
.
outputW
.
SetDataRand
(
-
minmax
,
minmax
);
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
model
.
hiddenW
[
i
].
SetDataRand
(
-
minmax
,
minmax
);
/* all bias terms are set to zero */
model
.
outputB
.
SetZeroAll
();
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
model
.
hiddenB
[
i
].
SetZeroAll
();
}
}
/*
/*
shuffle lines of the file
>> srcFile - the source file to shuffle
>> tgtFile - the resulting file
*/
void
Shuffle
(
const
char
*
srcFile
,
const
char
*
tgtFile
)
{
void
Shuffle
(
const
char
*
srcFile
,
const
char
*
tgtFile
)
{
char
*
line
=
new
char
[
MAX_LINE_LENGTH_HERE
];
#ifndef WIN32
sprintf
(
line
,
"shuf %s > %s"
,
srcFile
,
tgtFile
);
...
...
@@ -379,23 +382,23 @@ void Shuffle(const char * srcFile, const char * tgtFile)
#endif
delete
[]
line
;
}
}
char
lineBuf
[
MAX_LINE_LENGTH_HERE
];
int
wordBuf
[
MAX_LINE_LENGTH_HERE
];
char
lineBuf
[
MAX_LINE_LENGTH_HERE
];
int
wordBuf
[
MAX_LINE_LENGTH_HERE
];
/*
train the model with the standard SGD method
>> train - training data file
>> isShuffled - shuffle the data file or not
>> model - the fnn model
*/
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
)
{
/*
train the model with the standard SGD method
>> train - training data file
>> isShuffled - shuffle the data file or not
>> model - the fnn model
*/
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
)
{
char
name
[
MAX_NAME_LENGTH
];
/* shuffle the data */
if
(
isShuffled
)
{
if
(
isShuffled
)
{
sprintf
(
name
,
"%s-tmp"
,
train
);
Shuffle
(
train
,
name
);
}
...
...
@@ -420,9 +423,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XNet
autoDiffer
;
double
startT
=
GetClockSec
();
/* iterate for a number of epochs */
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
)
{
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
)
{
/* data file */
FILE
*
file
=
fopen
(
name
,
"rb"
);
...
...
@@ -432,7 +434,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss
=
0
;
ngramNum
=
1
;
while
(
ngramNum
>
0
)
{
while
(
ngramNum
>
0
)
{
/* load a minibatch of ngrams */
ngramNum
=
LoadNGrams
(
file
,
model
.
n
,
ngrams
,
sentBatch
,
wordBatch
);
...
...
@@ -453,13 +455,13 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XTensor
lossTensor
;
/* make the input tensor for position i */
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
,
model
.
mem
);
/* make the gold tensor */
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
,
model
.
mem
);
if
(
!
autoDiff
)
{
if
(
!
autoDiff
)
{
/* prepare an empty network for building the fnn */
FNNNet
net
;
...
...
@@ -469,15 +471,13 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
/* backward computation to obtain gradients */
Backward
(
inputs
,
output
,
gold
,
CROSSENTROPY
,
model
,
grad
,
net
);
/* update model parameters */
Update
(
model
,
grad
,
learningRate
,
false
);
}
else
{
else
{
/* gradient = 0 */
Clear
(
model
,
true
);
...
...
@@ -489,6 +489,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
lossTensor
=
CrossEntropy
(
output
,
gold
);
output
.
Dump
(
stderr
,
"output:"
,
10
);
gold
.
Dump
(
stderr
,
"gold:"
,
10
);
lossTensor
.
Dump
(
stderr
,
"lossTensor:"
,
10
);
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
...
...
@@ -500,14 +503,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* get probabilities */
float
prob
=
GetProb
(
output
,
gold
);
prob
=
ReduceSumAll
(
lossTensor
);
loss
+=
prob
;
if
(
autoDiff
)
{
prob
=
-
ReduceSumAll
(
lossTensor
);
}
//printf("prob:%f", prob);
loss
+=
-
prob
;
wordCount
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
if
(
++
step
>=
nStep
)
{
if
(
++
step
>=
nStep
)
{
isEnd
=
true
;
break
;
}
...
...
@@ -521,7 +525,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
fclose
(
file
);
if
(
isEnd
)
if
(
isEnd
)
break
;
Test
(
testFN
,
outputFN
,
model
);
...
...
@@ -535,17 +539,17 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
elapsed
,
step
,
epoch
);
delete
[]
ngrams
;
}
/*
update the model parameters using the delta rule
>> model - the model to update
>> grad - gradients
>> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
*/
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
)
{
}
/*
update the model parameters using the delta rule
>> model - the model to update
>> grad - gradients
>> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
*/
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
)
{
TensorList
paraList
(
10
);
TensorList
gradList
(
10
);
...
...
@@ -559,7 +563,7 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
paraList
.
Add
(
&
model
.
embeddingW
);
if
(
!
isNodeGrad
)
{
if
(
!
isNodeGrad
)
{
gradList
.
Add
(
&
grad
.
outputW
);
gradList
.
Add
(
&
grad
.
outputB
);
...
...
@@ -567,10 +571,10 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
gradList
.
Add
(
&
grad
.
hiddenW
[
i
]);
gradList
.
Add
(
&
grad
.
hiddenB
[
i
]);
}
;
;
gradList
.
Add
(
&
grad
.
embeddingW
);
}
else
{
else
{
gradList
.
Add
(
model
.
outputW
.
grad
);
gradList
.
Add
(
model
.
outputB
.
grad
);
...
...
@@ -592,17 +596,17 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
/* the delta rule */
_Sum
(
para
,
paraGrad
,
para
,
-
epsilon
);
}
}
}
/*
get prediction probabilites of the gold words
>> output - output probabilities
>> gold - gold standard
>> wordPobs - probability of each word
<< return - probability of the batch
*/
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
)
{
/*
get prediction probabilites of the gold words
>> output - output probabilities
>> gold - gold standard
>> wordPobs - probability of each word
<< return - probability of the batch
*/
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
)
{
XTensor
probs
;
InitTensorV2
(
&
probs
,
&
output
);
...
...
@@ -613,7 +617,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
XTensor
wprobs
;
InitTensor1DV2
(
&
wprobs
,
output
.
GetDim
(
0
),
output
.
dataType
,
output
.
devID
);
_ReduceSum
(
&
probs
,
&
wprobs
,
1
);
if
(
wordProbs
!=
NULL
)
if
(
wordProbs
!=
NULL
)
_CopyValues
(
&
wprobs
,
wordProbs
);
/* reshape the tensor to fit it into the reduce procedure
...
...
@@ -629,34 +633,34 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
_ReduceSum
(
&
probs
,
&
result
,
1
);
return
result
.
Get1D
(
0
);
}
int
pin
=
0
;
int
wordBufCount
=
0
;
/*
load a minibatch of ngrams
>> file - data file
>> n - order of the language model
>> ngrams - the loaded ngrams
>> sentNum - maximum sentences kept in the minibatch
>> wordNum - maximum words kept in the minibatch
*/
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
)
{
}
int
pin
=
0
;
int
wordBufCount
=
0
;
/*
load a minibatch of ngrams
>> file - data file
>> n - order of the language model
>> ngrams - the loaded ngrams
>> sentNum - maximum sentences kept in the minibatch
>> wordNum - maximum words kept in the minibatch
*/
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
)
{
int
num
=
0
;
int
lineNum
=
0
;
while
(
pin
>
0
||
fgets
(
lineBuf
,
MAX_LINE_LENGTH_HERE
-
1
,
file
))
{
if
(
pin
<=
0
)
{
while
(
pin
>
0
||
fgets
(
lineBuf
,
MAX_LINE_LENGTH_HERE
-
1
,
file
))
{
if
(
pin
<=
0
)
{
int
len
=
(
int
)
strlen
(
lineBuf
);
while
(
lineBuf
[
len
-
1
]
==
'\r'
||
lineBuf
[
len
-
1
]
==
'\n'
)
{
while
(
lineBuf
[
len
-
1
]
==
'\r'
||
lineBuf
[
len
-
1
]
==
'\n'
)
{
lineBuf
[
len
-
1
]
=
0
;
len
--
;
}
len
=
(
int
)
strlen
(
lineBuf
);
if
(
len
==
0
)
if
(
len
==
0
)
continue
;
/* how many characters are in a word */
...
...
@@ -666,9 +670,9 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
int
wNum
=
0
;
int
i
=
0
;
for
(
i
=
pin
;
i
<
len
;
i
++
)
{
for
(
i
=
pin
;
i
<
len
;
i
++
)
{
/* load word (id) seperated by space or tab */
if
((
lineBuf
[
i
]
==
' '
||
lineBuf
[
i
]
==
'\t'
)
&&
wSize
>
0
)
{
if
((
lineBuf
[
i
]
==
' '
||
lineBuf
[
i
]
==
'\t'
)
&&
wSize
>
0
)
{
lineBuf
[
i
]
=
0
;
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wSize
=
0
;
...
...
@@ -677,7 +681,7 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
wSize
++
;
}
if
(
wSize
>
0
)
if
(
wSize
>
0
)
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wordBufCount
=
wNum
;
...
...
@@ -689,69 +693,69 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
int
i
=
-
MAX_INT
;
/* create ngrams */
for
(
i
=
MAX
(
pin
,
n
-
1
);
i
<
wordBufCount
-
1
;
i
++
)
{
for
(
i
=
MAX
(
pin
,
n
-
1
);
i
<
wordBufCount
-
1
;
i
++
)
{
memcpy
(
ngrams
[
num
++
].
words
,
wordBuf
+
i
-
n
+
1
,
sizeof
(
int
)
*
n
);
if
(
num
>=
wordNum
)
if
(
num
>=
wordNum
)
break
;
}
/* set a finished flag if we reach the end of the sentence*/
if
(
i
>=
wordBufCount
-
1
)
{
if
(
i
>=
wordBufCount
-
1
)
{
pin
=
0
;
wordBufCount
=
0
;
}
/* record where to start next time if we break in the middle */
else
{
else
{
pin
=
i
+
1
;
}
if
((
sentNum
>
0
&&
lineNum
>=
sentNum
)
||
num
>=
wordNum
)
if
((
sentNum
>
0
&&
lineNum
>=
sentNum
)
||
num
>=
wordNum
)
break
;
}
return
num
;
}
/*
make a 2d tensor in zero-one representation
The indexed cell is set to 1, and 0 otherwise.
>> tensor - the tensor to initialize
>> rowNum - number of rows
>> colNum - number of columns
>> rows - row index
>> cols - column index
>> itemNum - number of non-zero items
>> devID - device id
>> mem - memory pool
*/
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
}
/*
make a 2d tensor in zero-one representation
The indexed cell is set to 1, and 0 otherwise.
>> tensor - the tensor to initialize
>> rowNum - number of rows
>> colNum - number of columns
>> rows - row index
>> cols - column index
>> itemNum - number of non-zero items
>> devID - device id
>> mem - memory pool
*/
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
int
itemNum
,
int
devID
,
XMem
*
mem
)
{
{
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT
,
devID
);
tensor
.
SetZeroAll
();
/* set none-zero cells */
for
(
int
i
=
0
;
i
<
itemNum
;
i
++
)
for
(
int
i
=
0
;
i
<
itemNum
;
i
++
)
tensor
.
Set2D
(
1.0
F
,
rows
[
i
],
cols
[
i
]);
}
/*
make a tensor that encodes a batch of words
>> batch - the tensor encoding a batch of words
>> ngrams - the ngram batch
>> ngramNum - batch size
>> n - indicate which word is encode for each ngram
>> vSize - vocabulary size
>> devID - device id
>> mem - memory pool
*/
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
)
{
}
/*
make a tensor that encodes a batch of words
>> batch - the tensor encoding a batch of words
>> ngrams - the ngram batch
>> ngramNum - batch size
>> n - indicate which word is encode for each ngram
>> vSize - vocabulary size
>> devID - device id
>> mem - memory pool
*/
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
)
{
int
*
rows
=
new
int
[
ngramNum
];
int
*
cols
=
new
int
[
ngramNum
];
for
(
int
i
=
0
;
i
<
ngramNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ngramNum
;
i
++
)
{
rows
[
i
]
=
i
;
cols
[
i
]
=
ngrams
[
i
].
words
[
n
];
}
...
...
@@ -760,31 +764,31 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
delete
[]
rows
;
delete
[]
cols
;
}
/*
forward procedure
>> inputs - input word representations
>> output - output probability
>> model - the fnn model
>> net - the network that keeps the internal tensors generated in the process
*/
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
)
{
}
/*
forward procedure
>> inputs - input word representations
>> output - output probability
>> model - the fnn model
>> net - the network that keeps the internal tensors generated in the process
*/
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
)
{
int
batchSize
=
-
1
;
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
TensorList
eList
(
n
-
1
);
/* previoius n - 1 words */
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
{
XTensor
&
input
=
inputs
[
i
];
XTensor
&
w
=
model
.
embeddingW
;
XTensor
&
embedding
=
net
.
embeddings
[
i
];
if
(
batchSize
==
-
1
)
if
(
batchSize
==
-
1
)
batchSize
=
input
.
dimSize
[
0
];
else
{
else
{
CheckErrors
(
batchSize
==
input
.
dimSize
[
0
],
"Wrong input word representations!"
);
}
...
...
@@ -804,7 +808,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
_Concatenate
(
&
eList
,
&
net
.
embeddingCat
,
1
);
/* go over each hidden layer */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
XTensor
&
h_pre
=
i
==
0
?
net
.
embeddingCat
:
net
.
hiddens
[
i
-
1
];
XTensor
&
w
=
model
.
hiddenW
[
i
];
XTensor
&
b
=
model
.
hiddenB
[
i
];
...
...
@@ -860,21 +864,21 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* y = softmax(s) */
_LogSoftmax
(
&
s
,
&
y
,
1
);
}
}
/*
backward procedure
>> inputs - input word representations
>> output - output probability
>> gold - gold standard
>> loss - loss function name
>> model - the fnn model
>> grad - the model that keeps the gradient information
>> net - the network that keeps the internal tensors generated in the process
*/
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
}
/*
backward procedure
>> inputs - input word representations
>> output - output probability
>> gold - gold standard
>> loss - loss function name
>> model - the fnn model
>> grad - the model that keeps the gradient information
>> net - the network that keeps the internal tensors generated in the process
*/
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
)
{
{
int
batchSize
=
output
.
GetDim
(
0
);
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
...
...
@@ -979,17 +983,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
delete
dedy
;
}
}
}
/*
forward process (with tensor connections) (this is implemented by gather function)
>> ngrams - the loaded ngrams
>> batch - the tensor encoding a batch of words
>> output - output probability
>> model - the fnn model
*/
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
)
{
/*
forward process (with tensor connections) (this is implemented by gather function)
>> ngrams - the loaded ngrams
>> batch - the tensor encoding a batch of words
>> output - output probability
>> model - the fnn model
*/
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
)
{
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
...
...
@@ -998,11 +1002,11 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
XTensor
hidden
;
XTensor
b
;
int
size
=
batch
*
(
n
-
1
);
int
size
=
batch
*
(
n
-
1
);
int
*
index
=
new
int
[
size
];
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
-
1
;
j
++
)
{
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
-
1
;
j
++
)
{
int
a
=
i
*
(
n
-
1
)
+
j
;
index
[
a
]
=
ngrams
[
i
].
words
[
j
];
}
...
...
@@ -1010,7 +1014,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
InitTensor1DV2
(
&
words
,
size
,
X_INT
,
model
.
devID
);
words
.
SetData
(
index
,
size
);
words
.
Dump
(
stderr
,
"word:"
,
10
);
embeddingBig
=
Gather
(
model
.
embeddingW
,
words
);
delete
[]
index
;
...
...
@@ -1018,26 +1022,26 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
int
dimSize
[
2
];
dimSize
[
0
]
=
embeddingBig
.
GetDim
(
0
)
/
(
n
-
1
);
dimSize
[
1
]
=
embeddingBig
.
GetDim
(
1
)
*
(
n
-
1
);
embeddingBig
.
Dump
(
stderr
,
"embeddingBig:"
,
10
);
hidden
=
Reshape
(
embeddingBig
,
embeddingBig
.
order
,
dimSize
);
hidden
.
Dump
(
stderr
,
"hidden-0:"
,
10
);
/* hidden layers */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
hidden
=
HardTanH
(
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
]);
hidden
.
Dump
(
stderr
,
"hidden-1:"
,
10
);
/* output layer */
//output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
output
=
Softmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
}
/*
forward process (with tensor connections) (this is implemented by multiply function)
>> inputs - input word representations
>> output - output probability
>> model - the fnn model
*/
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
)
{
}
/*
forward process (with tensor connections) (this is implemented by multiply function)
>> inputs - input word representations
>> output - output probability
>> model - the fnn model
*/
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
)
{
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
...
...
@@ -1047,7 +1051,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
XTensor
b
;
TensorList
inputList
(
n
-
1
);
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
inputList
.
Add
(
inputs
+
i
);
/* represent n - 1 words in one tensor */
...
...
@@ -1061,21 +1065,21 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
hidden
=
Merge
(
hidden
,
2
,
0
);
/* hidden layers */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
hidden
=
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
];
/* output layer */
output
=
LogSoftmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
}
}
/*
dump the model to the disk space
>> fn - where to keep the model
>> model - the fnn model
*/
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
)
{
/*
dump the model to the disk space
>> fn - where to keep the model
>> model - the fnn model
*/
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
)
{
FILE
*
file
=
fopen
(
fn
,
"wb"
);
CheckErrors
(
file
,
"Cannot open the model file"
);
...
...
@@ -1094,15 +1098,15 @@ void Dump(const char * fn, FNNModel &model)
fclose
(
file
);
XPRINT
(
0
,
stderr
,
"[INFO] model saved
\n
"
);
}
/*
read the model from the disk space
>> fn - where to keep the model
>> model - the fnn model
*/
void
Read
(
const
char
*
fn
,
FNNModel
&
model
)
{
}
/*
read the model from the disk space
>> fn - where to keep the model
>> model - the fnn model
*/
void
Read
(
const
char
*
fn
,
FNNModel
&
model
)
{
FILE
*
file
=
fopen
(
fn
,
"rb"
);
CheckErrors
(
file
,
"Cannot open the model file"
);
...
...
@@ -1121,16 +1125,16 @@ void Read(const char * fn, FNNModel &model)
fclose
(
file
);
XPRINT
(
0
,
stderr
,
"[INFO] model loaded
\n
"
);
}
/*
test the model
>> test - test data file
>> result - where to keep the result
>> model - the fnn model
*/
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
)
{
}
/*
test the model
>> test - test data file
>> result - where to keep the result
>> model - the fnn model
*/
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
)
{
int
wordCount
=
0
;
int
sentCount
=
0
;
float
loss
=
0
;
...
...
@@ -1173,14 +1177,13 @@ void Test(const char * test, const char * result, FNNModel &model)
if
(
!
autoDiff
)
{
/* prepare an empty network for building the fnn */
FNNNet
net
;
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
}
else
{
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
output
=
Log
(
output
);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
}
...
...
@@ -1213,11 +1216,11 @@ void Test(const char * test, const char * result, FNNModel &model)
double
elapsed
=
GetClockSec
()
-
startT
;
XPRINT1
(
0
,
stderr
,
"[INFO] ppl=%.2f
\n
"
,
exp
(
loss
/
wordCount
));
XPRINT1
(
0
,
stderr
,
"[INFO] ppl=%.2f
\n
"
,
exp
(
loss
/
wordCount
));
XPRINT3
(
0
,
stderr
,
"[INFO] test finished (took %.1fs, sentence=%d and ngram=%d)
\n
"
,
elapsed
,
sentCount
,
wordCount
);
delete
[]
ngrams
;
}
}
};
source/tensor/Main.cpp
查看文件 @
2c4061e9
...
...
@@ -28,7 +28,7 @@
#include <time.h>
#include "XTensor.h"
#include "XDevice.h"
#include "./test/Test.h"
//
#include "./test/Test.h"
#include "./core/CHeader.h"
#include "./loss/CrossEntropy.h"
...
...
@@ -44,7 +44,7 @@ void LittleTest();
void
T2TTest
();
void
T2TTest2
();
void
PowerTest
();
void
Tests
();
int
main
(
int
argc
,
const
char
**
argv
)
{
//PowerTest();
...
...
@@ -63,7 +63,7 @@ int main( int argc, const char ** argv )
//return 0;
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
Test
();
Test
s
();
else
{
fprintf
(
stderr
,
"Thanks for using NiuTrans.Tensor! This is a library that eases the
\n
"
);
fprintf
(
stderr
,
"use of tensors. All you need is to ...
\n\n
"
);
...
...
@@ -75,219 +75,223 @@ int main( int argc, const char ** argv )
return
0
;
}
void
myRead
(
XTensor
*
tensor
,
const
char
*
filename
,
const
char
*
label
)
{
FILE
*
file
=
fopen
(
filename
,
"rb"
);
if
(
file
==
NULL
)
printf
(
"%s
\n
"
,
filename
);
tensor
->
Read
(
file
,
label
);
}
void
myDump
(
XTensor
*
tensor
,
const
char
*
filename
,
const
char
*
label
)
{
FILE
*
file
=
fopen
(
filename
,
"wb"
);
if
(
file
==
NULL
)
printf
(
"%s
\n
"
,
filename
);
tensor
->
Dump
(
file
,
label
);
}
void
PowerTest
()
{
XTensor
input
;
XTensor
output
;
InitTensor2D
(
&
input
,
256
,
10000
,
X_FLOAT
,
0
);
InitTensor2D
(
&
output
,
256
,
10000
,
X_FLOAT
,
0
);
myRead
(
&
input
,
"1.txt"
,
""
);
_Power
(
&
input
,
&
output
,
2
);
output
.
Dump
(
stderr
,
""
,
200
);
}
void
SmallTest
()
{
XTensor
a
;
XTensor
b
;
XTensor
c
;
XTensor
d
;
InitTensor2D
(
&
a
,
2
,
2
);
InitTensor2D
(
&
b
,
2
,
2
);
a
.
SetZeroAll
();
b
.
SetZeroAll
();
a
.
Set2D
(
1.0
F
,
0
,
0
);
a
.
Set2D
(
2.0
F
,
1
,
1
);
b
=
Sum
(
a
,
Multiply
(
a
,
a
));
/* this is prohibited !!!!!!!!!!!!! */
//XTensor c = a * b + a;
//XTensor d = a + b + c.Lin(0.5F);
c
=
a
*
b
+
a
;
d
=
a
+
b
+
c
.
Lin
(
0.5
F
);
XLink
::
CheckNetwork
(
&
d
);
//XLink::ShowNetwork(stderr, &d);
a
.
Dump
(
stderr
,
"a:"
);
b
.
Dump
(
stderr
,
"b:"
);
c
.
Dump
(
stderr
,
"c:"
);
d
.
Dump
(
stderr
,
"d:"
);
}
void
TransposeTest
()
{
XTensor
a
;
XTensor
b
;
int
I
=
2
;
int
J
=
3
;
InitTensor4D
(
&
a
,
2
,
3
,
4
,
5
);
int
*
dims
=
new
int
[
a
.
order
];
memcpy
(
dims
,
a
.
dimSize
,
sizeof
(
int
)
*
a
.
order
);
dims
[
I
]
=
a
.
dimSize
[
J
];
dims
[
J
]
=
a
.
dimSize
[
I
];
InitTensor
(
&
b
,
4
,
dims
);
a
.
SetZeroAll
();
b
.
SetZeroAll
();
float
*
data
=
new
float
[
a
.
unitNum
];
for
(
int
i
=
0
;
i
<
a
.
unitNum
;
i
++
)
data
[
i
]
=
(
float
)
i
;
a
.
SetData
(
data
,
a
.
unitNum
,
0
);
_Transpose
(
&
a
,
&
b
,
I
,
J
);
b
.
Dump
(
stderr
,
"b:"
);
delete
[]
data
;
}
void
LittleTest
()
{
int
a
=
5000
;
int
b
=
100000
;
int
c
=
a
*
b
;
printf
(
"%d
\n
"
,
c
);
void
Tests
()
{
exit
(
1
);
}
void
T2TTest
()
{
XTensor
*
input
;
XTensor
*
weight
;
XTensor
*
output
;
XTensor
*
gold
;
XTensor
*
dedy
;
XTensor
*
dedx
;
XTensor
*
dedxTmp
;
XTensor
*
dedw
;
XTensor
*
padding
;
DTYPE
loss
;
int
*
dimSize
=
new
int
[
2
];
dimSize
[
0
]
=
256
;
dimSize
[
1
]
=
10001
;
int
*
dimSize2
=
new
int
[
3
];
dimSize2
[
0
]
=
2
;
dimSize2
[
1
]
=
31
;
dimSize2
[
2
]
=
256
;
int
*
dimSize3
=
new
int
[
3
];
dimSize3
[
0
]
=
2
;
dimSize3
[
1
]
=
31
;
dimSize3
[
2
]
=
10001
;
int
*
dimSize4
=
new
int
[
2
];
dimSize4
[
0
]
=
2
;
dimSize4
[
1
]
=
31
;
input
=
NewTensor
(
3
,
dimSize2
,
X_FLOAT
,
1.0
F
,
0
);
weight
=
NewTensor
(
2
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
dedw
=
NewTensor
(
2
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
gold
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
output
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
dedy
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
dedx
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
dedxTmp
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
padding
=
NewTensor
(
2
,
dimSize4
,
X_FLOAT
,
1.0
F
,
0
);
//weight = NewTensor(2, dimSize);
//dedw = NewTensor(2, dimSize);
//input = NewTensor(3, dimSize2);
//gold = NewTensor(3, dimSize3);
//output = NewTensor(3, dimSize3);
//dedy = NewTensor(3, dimSize3);
//dedx = NewTensor(3, dimSize3);
//dedxTmp = NewTensor(3, dimSize3);
//padding = NewTensor(2, dimSize4);
myRead
(
input
,
"x.txt"
,
"x"
);
myRead
(
weight
,
"w.txt"
,
"w"
);
myRead
(
gold
,
"gold.txt"
,
"gold"
);
myRead
(
padding
,
"padding.txt"
,
"padding"
);
XTensor
inter
;
inter
=
MMul
(
*
input
,
*
weight
);
_Softmax
(
&
inter
,
output
,
2
);
//_LogMe(output);
loss
=
_CrossEntropyFast
(
output
,
gold
,
REDUCE_MEAN
,
NULL
,
padding
);
printf
(
"loss: %f
\n
"
,
loss
);
_CrossEntropyBackward
(
dedy
,
output
,
gold
,
NULL
);
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
myDump
(
dedy
,
"dedy.txt"
,
"dedy"
);
_SoftmaxBackward
(
NULL
,
output
,
input
,
dedy
,
dedx
,
NULL
,
-
1
,
NOLOSS
);
_Sub
(
output
,
gold
,
dedxTmp
);
myDump
(
dedx
,
"dedx.txt"
,
"dedx"
);
dedx
->
Dump
(
stderr
,
"dedx"
,
200
);
dedxTmp
->
Dump
(
stderr
,
"dedxTmp"
,
200
);
input
->
Reshape
(
input
->
unitNum
/
input
->
GetDim
(
-
1
),
input
->
GetDim
(
-
1
));
dedx
->
Reshape
(
dedx
->
unitNum
/
dedx
->
GetDim
(
-
1
),
dedx
->
GetDim
(
-
1
));
_MatrixMulBatched
(
input
,
X_TRANS
,
dedx
,
X_NOTRANS
,
dedw
);
myDump
(
dedw
,
"dedw.txt"
,
"dedw"
);
}
void
T2TTest2
()
{
int
dimSize
[
3
];
dimSize
[
0
]
=
161
;
dimSize
[
1
]
=
47
;
dimSize
[
2
]
=
10001
;
XTensor
*
probs
=
NewTensor
(
3
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " ");
_SetDataFixedFloat
(
probs
,
1.0
F
);
probs
->
Reshape
(
1
,
probs
->
unitNum
);
DTYPE
sum
=
_ReduceSumAll
(
probs
);
printf
(
"%e
\n
"
,
sum
);
//XTensor tmp;
//tmp = IsNonZero(*probs);
//DTYPE nonZeroNum = ReduceSumAll(tmp);
//printf("%f\n", nonZeroNum);
//
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//printf("%e\n", gpu);
}
//void myRead(XTensor * tensor, const char * filename, const char * label)
//{
// FILE * file = fopen(filename, "rb");
// if(file == NULL)
// printf("%s\n", filename);
// tensor->Read(file, label);
//}
//
//void myDump(XTensor * tensor, const char * filename, const char * label)
//{
// FILE * file = fopen(filename, "wb");
// if(file == NULL)
// printf("%s\n", filename);
// tensor->Dump(file, label);
//}
//
//void PowerTest()
//{
// XTensor input;
// XTensor output;
// InitTensor2D(&input, 256, 10000, X_FLOAT, 0);
// InitTensor2D(&output, 256, 10000, X_FLOAT, 0);
// myRead(&input, "1.txt", "");
//
// _Power(&input, &output, 2);
// output.Dump(stderr, "", 200);
//}
//
//void SmallTest()
//{
// XTensor a;
// XTensor b;
// XTensor c;
// XTensor d;
//
// InitTensor2D(&a, 2, 2);
// InitTensor2D(&b, 2, 2);
// a.SetZeroAll();
// b.SetZeroAll();
// a.Set2D(1.0F, 0, 0);
// a.Set2D(2.0F, 1, 1);
//
// b = Sum(a, Multiply(a, a));
//
// /* this is prohibited !!!!!!!!!!!!! */
// //XTensor c = a * b + a;
// //XTensor d = a + b + c.Lin(0.5F);
//
// c = a * b + a;
// d = a + b + c.Lin(0.5F);
//
// XLink::CheckNetwork(&d);
// //XLink::ShowNetwork(stderr, &d);
//
// a.Dump(stderr, "a:");
// b.Dump(stderr, "b:");
// c.Dump(stderr, "c:");
// d.Dump(stderr, "d:");
//}
//
//void TransposeTest()
//{
// XTensor a;
// XTensor b;
//
// int I = 2;
// int J = 3;
//
// InitTensor4D(&a, 2, 3, 4, 5);
//
// int * dims = new int[a.order];
// memcpy(dims, a.dimSize, sizeof(int) * a.order);
// dims[I] = a.dimSize[J];
// dims[J] = a.dimSize[I];
//
// InitTensor(&b, 4, dims);
//
// a.SetZeroAll();
// b.SetZeroAll();
//
// float * data = new float[a.unitNum];
// for(int i = 0; i < a.unitNum; i++)
// data[i] = (float)i;
//
// a.SetData(data, a.unitNum, 0);
//
// _Transpose(&a, &b, I, J);
// b.Dump(stderr, "b:");
//
// delete[] data;
//}
//
//void LittleTest()
//{
// int a = 5000;
// int b = 100000;
// int c = a*b;
// printf("%d\n", c);
//
// exit(1);
//}
//
//void T2TTest()
//{
// XTensor * input;
// XTensor * weight;
// XTensor * output;
// XTensor * gold;
// XTensor * dedy;
// XTensor * dedx;
// XTensor * dedxTmp;
// XTensor * dedw;
// XTensor * padding;
//
// DTYPE loss;
//
// int * dimSize = new int[2];
// dimSize[0] = 256;
// dimSize[1] = 10001;
//
// int * dimSize2 = new int[3];
// dimSize2[0] = 2;
// dimSize2[1] = 31;
// dimSize2[2] = 256;
//
// int * dimSize3 = new int[3];
// dimSize3[0] = 2;
// dimSize3[1] = 31;
// dimSize3[2] = 10001;
//
// int * dimSize4 = new int[2];
// dimSize4[0] = 2;
// dimSize4[1] = 31;
//
// input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
// weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
// dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
// gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//
// //weight = NewTensor(2, dimSize);
// //dedw = NewTensor(2, dimSize);
// //input = NewTensor(3, dimSize2);
// //gold = NewTensor(3, dimSize3);
// //output = NewTensor(3, dimSize3);
// //dedy = NewTensor(3, dimSize3);
// //dedx = NewTensor(3, dimSize3);
// //dedxTmp = NewTensor(3, dimSize3);
// //padding = NewTensor(2, dimSize4);
//
// myRead(input, "x.txt", "x");
// myRead(weight, "w.txt", "w");
// myRead(gold, "gold.txt", "gold");
// myRead(padding, "padding.txt", "padding");
//
// XTensor inter;
// inter = MMul(*input, *weight);
//
// _Softmax(&inter, output, 2);
//
// //_LogMe(output);
// loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
//
// printf("loss: %f\n", loss);
//
// _CrossEntropyBackward(dedy, output, gold, NULL);
// //_CrossEntropyBackward(dedy, output, gold, NULL, padding);
//
// myDump(dedy, "dedy.txt", "dedy");
//
// _SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
// _Sub(output, gold, dedxTmp);
//
// myDump(dedx, "dedx.txt", "dedx");
// dedx->Dump(stderr, "dedx", 200);
// dedxTmp->Dump(stderr, "dedxTmp", 200);
//
// input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
// dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
//
// _MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
//
// myDump(dedw, "dedw.txt", "dedw");
//}
//
//void T2TTest2()
//{
// int dimSize[3];
// dimSize[0] = 161;
// dimSize[1] = 47;
// dimSize[2] = 10001;
// XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
// //XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//
// //myRead(probs, "probs.txt", " ");
// _SetDataFixedFloat(probs, 1.0F);
//
// probs->Reshape(1, probs->unitNum);
//
// DTYPE sum = _ReduceSumAll(probs);
// printf("%e\n", sum);
//
// //XTensor tmp;
// //tmp = IsNonZero(*probs);
// //DTYPE nonZeroNum = ReduceSumAll(tmp);
// //printf("%f\n", nonZeroNum);
// //
// //DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//
// //printf("%e\n", gpu);
//}
source/tensor/loss/CrossEntropy.cu
查看文件 @
2c4061e9
...
...
@@ -196,17 +196,17 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
delete[] dims;
}
if(padding != NULL) {
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
else {
int num = dedy->unitNum / dedy->GetDim(n);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num);
}
//
if(padding != NULL) {
//
XTensor * tmp = NewTensor(padding);
//
_IsNonZero(padding, tmp);
//
int nonZeroNum = (int)_ReduceSumAll(tmp);
//
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
//
delete tmp;
//
}
//
else {
//
int num = dedy->unitNum / dedy->GetDim(n);
//
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num);
//
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论