Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Tensor.LowPrecision
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
linye
Tensor.LowPrecision
Commits
2c4061e9
Commit
2c4061e9
authored
Jul 30, 2019
by
ltb
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed FNNLM of branch of xiao
parent
3800528b
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
570 行增加
和
564 行删除
+570
-564
source/network/Main.cpp
+0
-1
source/sample/fnnlm/FNNLM.cpp
+340
-337
source/tensor/Main.cpp
+219
-215
source/tensor/loss/CrossEntropy.cu
+11
-11
没有找到文件。
source/network/Main.cpp
查看文件 @
2c4061e9
...
@@ -24,7 +24,6 @@
...
@@ -24,7 +24,6 @@
#include "../tensor/XUtility.h"
#include "../tensor/XUtility.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/test/Test.h"
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/fnnlm/FNNLM.h"
#include "../sample/transformer/Transformer.h"
#include "../sample/transformer/Transformer.h"
...
...
source/sample/fnnlm/FNNLM.cpp
查看文件 @
2c4061e9
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
* limitations under the License.
* limitations under the License.
*/
*/
/*
/*
*
*
* This is a simple impelementation of the feed-forward network-baesd language
* This is a simple impelementation of the feed-forward network-baesd language
* model (FNNLM). See more details about FNNLM in
* model (FNNLM). See more details about FNNLM in
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include "../../tensor/XDevice.h"
#include "../../tensor/XDevice.h"
#include "../../tensor/function/FHeader.h"
#include "../../tensor/function/FHeader.h"
#include "../../network/XNet.h"
#include "../../network/XNet.h"
#include "../../tensor/core/math/ScaleAndShift.h"
namespace
fnnlm
namespace
fnnlm
{
{
...
@@ -39,50 +40,50 @@ namespace fnnlm
...
@@ -39,50 +40,50 @@ namespace fnnlm
#define MAX_NAME_LENGTH 1024
#define MAX_NAME_LENGTH 1024
#define MAX_LINE_LENGTH_HERE 1024 * 32
#define MAX_LINE_LENGTH_HERE 1024 * 32
char
trainFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the training data
char
trainFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the training data
char
modelFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the FNN model
char
modelFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the FNN model
char
testFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the test data
char
testFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the test data
char
outputFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the result data
char
outputFN
[
MAX_NAME_LENGTH
]
=
""
;
// file name of the result data
float
learningRate
=
0.01
F
;
// learning rate
float
learningRate
=
0.01
F
;
// learning rate
int
nStep
=
10000000
;
// max learning steps (or model updates)
int
nStep
=
10000000
;
// max learning steps (or model updates)
int
nEpoch
=
10
;
// max training epochs
int
nEpoch
=
10
;
// max training epochs
float
minmax
=
0.08
F
;
// range [-p,p] for parameter initialization
float
minmax
=
0.08
F
;
// range [-p,p] for parameter initialization
int
sentBatch
=
0
;
// batch size at the sentence level
int
sentBatch
=
0
;
// batch size at the sentence level
int
wordBatch
=
1
;
// batch size at the word level
int
wordBatch
=
1
;
// batch size at the word level
bool
shuffled
=
false
;
// shuffled the training data file or not
bool
shuffled
=
false
;
// shuffled the training data file or not
bool
autoDiff
=
false
;
// indicator of automatic differentiation
bool
autoDiff
=
false
;
// indicator of automatic differentiation
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
);
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
);
void
Init
(
FNNModel
&
model
);
void
Init
(
FNNModel
&
model
);
void
Check
(
FNNModel
&
model
);
void
Check
(
FNNModel
&
model
);
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
);
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
);
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
);
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
);
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
);
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
);
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
);
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
);
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
);
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
);
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
);
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
);
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
=
NULL
);
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
=
NULL
);
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
);
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
);
void
Read
(
const
char
*
fn
,
FNNModel
&
model
);
void
Read
(
const
char
*
fn
,
FNNModel
&
model
);
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
);
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
);
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
);
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
);
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
int
itemNum
,
int
devID
,
XMem
*
mem
);
int
itemNum
,
int
devID
,
XMem
*
mem
);
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
);
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
);
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
);
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
);
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
);
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
);
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
);
/*
/*
entry of the program
entry of the program
>> argc - number of the arguments
>> argc - number of the arguments
>> argv - pointers to the arguments
>> argv - pointers to the arguments
<< return - error code
<< return - error code
arguments:
arguments:
-train S: specify training data file name
-train S: specify training data file name
-model S: specify model file name
-model S: specify model file name
-test S: specify test data file name
-test S: specify test data file name
...
@@ -110,10 +111,10 @@ arguments:
...
@@ -110,10 +111,10 @@ arguments:
E.g.,
E.g.,
0 29 2 11 1
0 29 2 11 1
might be a line of the file.
might be a line of the file.
*/
*/
int
FNNLMMain
(
int
argc
,
const
char
**
argv
)
int
FNNLMMain
(
int
argc
,
const
char
**
argv
)
{
{
if
(
argc
==
0
)
if
(
argc
==
0
)
return
1
;
return
1
;
FNNModel
model
;
FNNModel
model
;
...
@@ -128,127 +129,127 @@ int FNNLMMain(int argc, const char ** argv)
...
@@ -128,127 +129,127 @@ int FNNLMMain(int argc, const char ** argv)
Init
(
model
);
Init
(
model
);
/* learn model parameters */
/* learn model parameters */
if
(
strcmp
(
trainFN
,
""
))
if
(
strcmp
(
trainFN
,
""
))
Train
(
trainFN
,
shuffled
,
model
);
Train
(
trainFN
,
shuffled
,
model
);
/* save the final model */
/* save the final model */
if
(
strcmp
(
modelFN
,
""
)
&&
strcmp
(
trainFN
,
""
))
if
(
strcmp
(
modelFN
,
""
)
&&
strcmp
(
trainFN
,
""
))
Dump
(
modelFN
,
model
);
Dump
(
modelFN
,
model
);
/* load the model if neccessary */
/* load the model if neccessary */
if
(
strcmp
(
modelFN
,
""
))
if
(
strcmp
(
modelFN
,
""
))
Read
(
modelFN
,
model
);
Read
(
modelFN
,
model
);
/* test the model on the new data */
/* test the model on the new data */
if
(
strcmp
(
testFN
,
""
)
&&
strcmp
(
outputFN
,
""
))
if
(
strcmp
(
testFN
,
""
)
&&
strcmp
(
outputFN
,
""
))
Test
(
testFN
,
outputFN
,
model
);
Test
(
testFN
,
outputFN
,
model
);
return
0
;
return
0
;
}
}
/*
/*
load arguments
load arguments
>> argc - number of the arguments
>> argc - number of the arguments
>> argv - pointers to the arguments
>> argv - pointers to the arguments
>> model - the fnn model
>> model - the fnn model
*/
*/
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
)
void
LoadArgs
(
int
argc
,
const
char
**
argv
,
FNNModel
&
model
)
{
{
fprintf
(
stderr
,
"args:
\n
"
);
fprintf
(
stderr
,
"args:
\n
"
);
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
if
(
!
strcmp
(
argv
[
i
],
"-train"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-train"
)
&&
i
+
1
<
argc
)
{
strcpy
(
trainFN
,
argv
[
i
+
1
]);
strcpy
(
trainFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -train=%s
\n
"
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -train=%s
\n
"
,
argv
[
i
+
1
]);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-model"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-model"
)
&&
i
+
1
<
argc
)
{
strcpy
(
modelFN
,
argv
[
i
+
1
]);
strcpy
(
modelFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -model=%s
\n
"
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -model=%s
\n
"
,
argv
[
i
+
1
]);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-test"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-test"
)
&&
i
+
1
<
argc
)
{
strcpy
(
testFN
,
argv
[
i
+
1
]);
strcpy
(
testFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -test=%s
\n
"
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -test=%s
\n
"
,
argv
[
i
+
1
]);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-output"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-output"
)
&&
i
+
1
<
argc
)
{
strcpy
(
outputFN
,
argv
[
i
+
1
]);
strcpy
(
outputFN
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -output=%s
\n
"
,
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -output=%s
\n
"
,
argv
[
i
+
1
]);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-n"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-n"
)
&&
i
+
1
<
argc
)
{
model
.
n
=
atoi
(
argv
[
i
+
1
]);
model
.
n
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -n=%d
\n
"
,
model
.
n
);
fprintf
(
stderr
,
" -n=%d
\n
"
,
model
.
n
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-esize"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-esize"
)
&&
i
+
1
<
argc
)
{
model
.
eSize
=
atoi
(
argv
[
i
+
1
]);
model
.
eSize
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -esize=%d
\n
"
,
model
.
eSize
);
fprintf
(
stderr
,
" -esize=%d
\n
"
,
model
.
eSize
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-vsize"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-vsize"
)
&&
i
+
1
<
argc
)
{
model
.
vSize
=
atoi
(
argv
[
i
+
1
]);
model
.
vSize
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -vsize=%d
\n
"
,
model
.
vSize
);
fprintf
(
stderr
,
" -vsize=%d
\n
"
,
model
.
vSize
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-hdepth"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-hdepth"
)
&&
i
+
1
<
argc
)
{
model
.
hDepth
=
atoi
(
argv
[
i
+
1
]);
model
.
hDepth
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -hdepth=%d
\n
"
,
model
.
hDepth
);
fprintf
(
stderr
,
" -hdepth=%d
\n
"
,
model
.
hDepth
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-hsize"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-hsize"
)
&&
i
+
1
<
argc
)
{
model
.
hSize
=
atoi
(
argv
[
i
+
1
]);
model
.
hSize
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -hsize=%d
\n
"
,
model
.
hSize
);
fprintf
(
stderr
,
" -hsize=%d
\n
"
,
model
.
hSize
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-lrate"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-lrate"
)
&&
i
+
1
<
argc
)
{
learningRate
=
(
float
)
atof
(
argv
[
i
+
1
]);
learningRate
=
(
float
)
atof
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -lrate=%f
\n
"
,
learningRate
);
fprintf
(
stderr
,
" -lrate=%f
\n
"
,
learningRate
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-nstep"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-nstep"
)
&&
i
+
1
<
argc
)
{
nStep
=
atoi
(
argv
[
i
+
1
]);
nStep
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -nstep=%d
\n
"
,
nStep
);
fprintf
(
stderr
,
" -nstep=%d
\n
"
,
nStep
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-nepoch"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-nepoch"
)
&&
i
+
1
<
argc
)
{
nEpoch
=
atoi
(
argv
[
i
+
1
]);
nEpoch
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -nepoch=%d
\n
"
,
nEpoch
);
fprintf
(
stderr
,
" -nepoch=%d
\n
"
,
nEpoch
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-minmax"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-minmax"
)
&&
i
+
1
<
argc
)
{
minmax
=
(
float
)
fabs
(
atof
(
argv
[
i
+
1
]));
minmax
=
(
float
)
fabs
(
atof
(
argv
[
i
+
1
]));
fprintf
(
stderr
,
" -minmax=%f
\n
"
,
minmax
);
fprintf
(
stderr
,
" -minmax=%f
\n
"
,
minmax
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-batch"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-batch"
)
&&
i
+
1
<
argc
)
{
sentBatch
=
atoi
(
argv
[
i
+
1
]);
sentBatch
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -batch=%d
\n
"
,
sentBatch
);
fprintf
(
stderr
,
" -batch=%d
\n
"
,
sentBatch
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-wbatch"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-wbatch"
)
&&
i
+
1
<
argc
)
{
wordBatch
=
atoi
(
argv
[
i
+
1
]);
wordBatch
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -wbatch=%d
\n
"
,
wordBatch
);
fprintf
(
stderr
,
" -wbatch=%d
\n
"
,
wordBatch
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-shuffle"
))
{
if
(
!
strcmp
(
argv
[
i
],
"-shuffle"
))
{
shuffled
=
true
;
shuffled
=
true
;
fprintf
(
stderr
,
" -shuffle=true
\n
"
);
fprintf
(
stderr
,
" -shuffle=true
\n
"
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-autodiff"
))
{
if
(
!
strcmp
(
argv
[
i
],
"-autodiff"
))
{
autoDiff
=
true
;
autoDiff
=
true
;
fprintf
(
stderr
,
" -autodiff=true
\n
"
);
fprintf
(
stderr
,
" -autodiff=true
\n
"
);
}
}
if
(
!
strcmp
(
argv
[
i
],
"-dev"
)
&&
i
+
1
<
argc
)
{
if
(
!
strcmp
(
argv
[
i
],
"-dev"
)
&&
i
+
1
<
argc
)
{
model
.
devID
=
atoi
(
argv
[
i
+
1
]);
model
.
devID
=
atoi
(
argv
[
i
+
1
]);
fprintf
(
stderr
,
" -dev=%d
\n
"
,
model
.
devID
);
fprintf
(
stderr
,
" -dev=%d
\n
"
,
model
.
devID
);
}
}
}
}
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
for
(
int
i
=
0
;
i
<
argc
;
i
++
)
{
if
(
!
strcmp
(
argv
[
i
],
"-mempool"
))
if
(
!
strcmp
(
argv
[
i
],
"-mempool"
))
model
.
mem
=
new
XMem
(
model
.
devID
);
model
.
mem
=
new
XMem
(
model
.
devID
);
}
}
}
}
/* check model settings */
/* check model settings */
void
Check
(
FNNModel
&
model
)
void
Check
(
FNNModel
&
model
)
{
{
CheckErrors
(
model
.
n
>
0
&&
model
.
n
<=
MAX_N_GRAM
,
"The LM order is out of range (use -n)!"
);
CheckErrors
(
model
.
n
>
0
&&
model
.
n
<=
MAX_N_GRAM
,
"The LM order is out of range (use -n)!"
);
CheckErrors
(
model
.
vSize
>
0
,
"no vocabulary size found (use -vsize)!"
);
CheckErrors
(
model
.
vSize
>
0
,
"no vocabulary size found (use -vsize)!"
);
CheckErrors
(
model
.
eSize
>
0
,
"no embedding size found (use -esize)!"
);
CheckErrors
(
model
.
eSize
>
0
,
"no embedding size found (use -esize)!"
);
}
}
/* make a hard copy of the fnn model */
/* make a hard copy of the fnn model */
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
)
void
Copy
(
FNNModel
&
tgt
,
FNNModel
&
src
)
{
{
InitTensorV2
(
&
tgt
.
embeddingW
,
&
src
.
embeddingW
);
InitTensorV2
(
&
tgt
.
embeddingW
,
&
src
.
embeddingW
);
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
InitTensorV2
(
&
tgt
.
hiddenW
[
i
],
&
src
.
hiddenW
[
i
]);
InitTensorV2
(
&
tgt
.
hiddenW
[
i
],
&
src
.
hiddenW
[
i
]);
InitTensorV2
(
&
tgt
.
hiddenB
[
i
],
&
src
.
hiddenB
[
i
]);
InitTensorV2
(
&
tgt
.
hiddenB
[
i
],
&
src
.
hiddenB
[
i
]);
}
}
...
@@ -262,33 +263,33 @@ void Copy(FNNModel &tgt, FNNModel &src)
...
@@ -262,33 +263,33 @@ void Copy(FNNModel &tgt, FNNModel &src)
tgt
.
vSize
=
src
.
vSize
;
tgt
.
vSize
=
src
.
vSize
;
tgt
.
devID
=
src
.
devID
;
tgt
.
devID
=
src
.
devID
;
tgt
.
useMemPool
=
src
.
useMemPool
;
tgt
.
useMemPool
=
src
.
useMemPool
;
if
(
src
.
mem
!=
NULL
)
{
if
(
src
.
mem
!=
NULL
)
{
tgt
.
mem
=
new
XMem
(
src
.
mem
->
devID
,
src
.
mem
->
mode
,
tgt
.
mem
=
new
XMem
(
src
.
mem
->
devID
,
src
.
mem
->
mode
,
src
.
mem
->
maxBlockSize
,
src
.
mem
->
blockNum
,
src
.
mem
->
maxBlockSize
,
src
.
mem
->
blockNum
,
src
.
mem
->
bufSize
);
src
.
mem
->
bufSize
);
}
}
}
}
/*
/*
reset model parameters
reset model parameters
>> model - the model whose parameter (gradient) is set to 0
>> model - the model whose parameter (gradient) is set to 0
>> isNodeGrad - indicates whether the tensor node keeps the
>> isNodeGrad - indicates whether the tensor node keeps the
gradient information
gradient information
*/
*/
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
)
void
Clear
(
FNNModel
&
model
,
bool
isNodeGrad
)
{
{
if
(
isNodeGrad
)
{
if
(
isNodeGrad
)
{
if
(
model
.
embeddingW
.
grad
!=
NULL
)
if
(
model
.
embeddingW
.
grad
!=
NULL
)
model
.
embeddingW
.
grad
->
SetZeroAll
();
model
.
embeddingW
.
grad
->
SetZeroAll
();
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
for
(
int
i
=
0
;
i
<
MAX_HIDDEN_NUM
;
i
++
)
{
if
(
model
.
hiddenW
[
i
].
grad
!=
NULL
)
if
(
model
.
hiddenW
[
i
].
grad
!=
NULL
)
model
.
hiddenW
[
i
].
grad
->
SetZeroAll
();
model
.
hiddenW
[
i
].
grad
->
SetZeroAll
();
if
(
model
.
hiddenB
[
i
].
grad
!=
NULL
)
if
(
model
.
hiddenB
[
i
].
grad
!=
NULL
)
model
.
hiddenB
[
i
].
grad
->
SetZeroAll
();
model
.
hiddenB
[
i
].
grad
->
SetZeroAll
();
}
}
if
(
model
.
outputW
.
grad
!=
NULL
)
if
(
model
.
outputW
.
grad
!=
NULL
)
model
.
outputW
.
grad
->
SetZeroAll
();
model
.
outputW
.
grad
->
SetZeroAll
();
if
(
model
.
outputB
.
grad
!=
NULL
)
if
(
model
.
outputB
.
grad
!=
NULL
)
model
.
outputB
.
grad
->
SetZeroAll
();
model
.
outputB
.
grad
->
SetZeroAll
();
}
}
else
{
else
{
...
@@ -300,76 +301,78 @@ void Clear(FNNModel &model, bool isNodeGrad)
...
@@ -300,76 +301,78 @@ void Clear(FNNModel &model, bool isNodeGrad)
model
.
outputW
.
SetZeroAll
();
model
.
outputW
.
SetZeroAll
();
model
.
outputB
.
SetZeroAll
();
model
.
outputB
.
SetZeroAll
();
}
}
}
}
/*
/*
initialize a 1d tensor using the fnn model setting
initialize a 1d tensor using the fnn model setting
>> tensor - the tensor to initialize
>> tensor - the tensor to initialize
>> num - number of items
>> num - number of items
>> model - the fnn model
>> model - the fnn model
*/
*/
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
)
void
InitModelTensor1D
(
XTensor
&
tensor
,
int
num
,
FNNModel
&
model
)
{
{
InitTensor1DV2
(
&
tensor
,
num
,
X_FLOAT
,
model
.
devID
);
InitTensor1DV2
(
&
tensor
,
num
,
X_FLOAT
,
model
.
devID
);
}
}
/*
/*
initialize a 2d tensor using the fnn model setting
initialize a 2d tensor using the fnn model setting
>> tensor - the tensor to initialize
>> tensor - the tensor to initialize
>> rowNum - number of rows
>> rowNum - number of rows
>> colNum - number of columns
>> colNum - number of columns
>> model - the fnn model
>> model - the fnn model
*/
*/
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
)
void
InitModelTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
FNNModel
&
model
)
{
{
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT
,
model
.
devID
);
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT
,
model
.
devID
);
}
}
/* initialize the model */
/* initialize the model */
void
Init
(
FNNModel
&
model
)
void
Init
(
FNNModel
&
model
)
{
{
/* create embedding parameter matrix: vSize * eSize */
/* create embedding parameter matrix: vSize * eSize */
InitModelTensor2D
(
model
.
embeddingW
,
model
.
vSize
,
model
.
eSize
,
model
);
InitModelTensor2D
(
model
.
embeddingW
,
model
.
vSize
,
model
.
eSize
,
model
);
model
.
embeddingW
.
SetVarFlag
();
/* create hidden layer parameter matrics */
/* create hidden layer parameter matrics */
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
{
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
{
/* hidden layer parameter matrix: (n-1)eSize * hsize if it is the first layer
/* hidden layer parameter matrix: (n-1)eSize * hsize if it is the first layer
hsize * hsize otherwise */
hsize * hsize otherwise */
if
(
i
==
0
)
if
(
i
==
0
)
InitModelTensor2D
(
model
.
hiddenW
[
i
],
(
model
.
n
-
1
)
*
model
.
eSize
,
model
.
hSize
,
model
);
InitModelTensor2D
(
model
.
hiddenW
[
i
],
(
model
.
n
-
1
)
*
model
.
eSize
,
model
.
hSize
,
model
);
else
else
InitModelTensor2D
(
model
.
hiddenW
[
i
],
model
.
hSize
,
model
.
hSize
,
model
);
InitModelTensor2D
(
model
.
hiddenW
[
i
],
model
.
hSize
,
model
.
hSize
,
model
);
model
.
hiddenW
[
i
].
SetVarFlag
();
/* bias term: a row vector of hSize entries */
/* bias term: a row vector of hSize entries */
InitModelTensor1D
(
model
.
hiddenB
[
i
],
model
.
hSize
,
model
);
InitModelTensor1D
(
model
.
hiddenB
[
i
],
model
.
hSize
,
model
);
model
.
hiddenB
[
i
].
SetVarFlag
();
}
}
/* create the output layer parameter matrix and bias term */
/* create the output layer parameter matrix and bias term */
int
iSize
=
model
.
hDepth
==
0
?
(
model
.
n
-
1
)
*
model
.
eSize
:
model
.
hSize
;
int
iSize
=
model
.
hDepth
==
0
?
(
model
.
n
-
1
)
*
model
.
eSize
:
model
.
hSize
;
InitModelTensor2D
(
model
.
outputW
,
iSize
,
model
.
vSize
,
model
);
InitModelTensor2D
(
model
.
outputW
,
iSize
,
model
.
vSize
,
model
);
InitModelTensor1D
(
model
.
outputB
,
model
.
vSize
,
model
);
InitModelTensor1D
(
model
.
outputB
,
model
.
vSize
,
model
);
model
.
outputW
.
SetVarFlag
();
model
.
outputB
.
SetVarFlag
();
/* then, we initialize model parameters using a uniform distribution in range
/* then, we initialize model parameters using a uniform distribution in range
of [-minmax, minmax] */
of [-minmax, minmax] */
model
.
embeddingW
.
SetDataRand
(
-
minmax
,
minmax
);
model
.
embeddingW
.
SetDataRand
(
-
minmax
,
minmax
);
model
.
outputW
.
SetDataRand
(
-
minmax
,
minmax
);
model
.
outputW
.
SetDataRand
(
-
minmax
,
minmax
);
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
model
.
hiddenW
[
i
].
SetDataRand
(
-
minmax
,
minmax
);
model
.
hiddenW
[
i
].
SetDataRand
(
-
minmax
,
minmax
);
/* all bias terms are set to zero */
/* all bias terms are set to zero */
model
.
outputB
.
SetZeroAll
();
model
.
outputB
.
SetZeroAll
();
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
hDepth
;
i
++
)
model
.
hiddenB
[
i
].
SetZeroAll
();
model
.
hiddenB
[
i
].
SetZeroAll
();
}
}
/*
/*
shuffle lines of the file
shuffle lines of the file
>> srcFile - the source file to shuffle
>> srcFile - the source file to shuffle
>> tgtFile - the resulting file
>> tgtFile - the resulting file
*/
*/
void
Shuffle
(
const
char
*
srcFile
,
const
char
*
tgtFile
)
void
Shuffle
(
const
char
*
srcFile
,
const
char
*
tgtFile
)
{
{
char
*
line
=
new
char
[
MAX_LINE_LENGTH_HERE
];
char
*
line
=
new
char
[
MAX_LINE_LENGTH_HERE
];
#ifndef WIN32
#ifndef WIN32
sprintf
(
line
,
"shuf %s > %s"
,
srcFile
,
tgtFile
);
sprintf
(
line
,
"shuf %s > %s"
,
srcFile
,
tgtFile
);
...
@@ -379,23 +382,23 @@ void Shuffle(const char * srcFile, const char * tgtFile)
...
@@ -379,23 +382,23 @@ void Shuffle(const char * srcFile, const char * tgtFile)
#endif
#endif
delete
[]
line
;
delete
[]
line
;
}
}
char
lineBuf
[
MAX_LINE_LENGTH_HERE
];
char
lineBuf
[
MAX_LINE_LENGTH_HERE
];
int
wordBuf
[
MAX_LINE_LENGTH_HERE
];
int
wordBuf
[
MAX_LINE_LENGTH_HERE
];
/*
/*
train the model with the standard SGD method
train the model with the standard SGD method
>> train - training data file
>> train - training data file
>> isShuffled - shuffle the data file or not
>> isShuffled - shuffle the data file or not
>> model - the fnn model
>> model - the fnn model
*/
*/
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
)
void
Train
(
const
char
*
train
,
bool
isShuffled
,
FNNModel
&
model
)
{
{
char
name
[
MAX_NAME_LENGTH
];
char
name
[
MAX_NAME_LENGTH
];
/* shuffle the data */
/* shuffle the data */
if
(
isShuffled
)
{
if
(
isShuffled
)
{
sprintf
(
name
,
"%s-tmp"
,
train
);
sprintf
(
name
,
"%s-tmp"
,
train
);
Shuffle
(
train
,
name
);
Shuffle
(
train
,
name
);
}
}
...
@@ -420,9 +423,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -420,9 +423,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XNet
autoDiffer
;
XNet
autoDiffer
;
double
startT
=
GetClockSec
();
double
startT
=
GetClockSec
();
/* iterate for a number of epochs */
/* iterate for a number of epochs */
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
)
{
for
(
epoch
=
0
;
epoch
<
nEpoch
;
epoch
++
)
{
/* data file */
/* data file */
FILE
*
file
=
fopen
(
name
,
"rb"
);
FILE
*
file
=
fopen
(
name
,
"rb"
);
...
@@ -432,7 +434,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -432,7 +434,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss
=
0
;
loss
=
0
;
ngramNum
=
1
;
ngramNum
=
1
;
while
(
ngramNum
>
0
)
{
while
(
ngramNum
>
0
)
{
/* load a minibatch of ngrams */
/* load a minibatch of ngrams */
ngramNum
=
LoadNGrams
(
file
,
model
.
n
,
ngrams
,
sentBatch
,
wordBatch
);
ngramNum
=
LoadNGrams
(
file
,
model
.
n
,
ngrams
,
sentBatch
,
wordBatch
);
...
@@ -453,13 +455,13 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -453,13 +455,13 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XTensor
lossTensor
;
XTensor
lossTensor
;
/* make the input tensor for position i */
/* make the input tensor for position i */
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
for
(
int
i
=
0
;
i
<
model
.
n
-
1
;
i
++
)
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
,
model
.
mem
);
MakeWordBatch
(
inputs
[
i
],
ngrams
,
ngramNum
,
i
,
model
.
vSize
,
model
.
devID
,
model
.
mem
);
/* make the gold tensor */
/* make the gold tensor */
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
,
model
.
mem
);
MakeWordBatch
(
gold
,
ngrams
,
ngramNum
,
model
.
n
-
1
,
model
.
vSize
,
model
.
devID
,
model
.
mem
);
if
(
!
autoDiff
)
{
if
(
!
autoDiff
)
{
/* prepare an empty network for building the fnn */
/* prepare an empty network for building the fnn */
FNNNet
net
;
FNNNet
net
;
...
@@ -469,15 +471,13 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -469,15 +471,13 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* forward computation */
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
Forward
(
inputs
,
output
,
model
,
net
);
/* backward computation to obtain gradients */
/* backward computation to obtain gradients */
Backward
(
inputs
,
output
,
gold
,
CROSSENTROPY
,
model
,
grad
,
net
);
Backward
(
inputs
,
output
,
gold
,
CROSSENTROPY
,
model
,
grad
,
net
);
/* update model parameters */
/* update model parameters */
Update
(
model
,
grad
,
learningRate
,
false
);
Update
(
model
,
grad
,
learningRate
,
false
);
}
}
else
{
else
{
/* gradient = 0 */
/* gradient = 0 */
Clear
(
model
,
true
);
Clear
(
model
,
true
);
...
@@ -489,6 +489,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -489,6 +489,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* this is implemented by multiply function */
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
//ForwardAutoDiff(inputs, output, model);
lossTensor
=
CrossEntropy
(
output
,
gold
);
lossTensor
=
CrossEntropy
(
output
,
gold
);
output
.
Dump
(
stderr
,
"output:"
,
10
);
gold
.
Dump
(
stderr
,
"gold:"
,
10
);
lossTensor
.
Dump
(
stderr
,
"lossTensor:"
,
10
);
/* automatic differentiation */
/* automatic differentiation */
autoDiffer
.
Backward
(
lossTensor
);
autoDiffer
.
Backward
(
lossTensor
);
...
@@ -500,14 +503,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -500,14 +503,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* get probabilities */
/* get probabilities */
float
prob
=
GetProb
(
output
,
gold
);
float
prob
=
GetProb
(
output
,
gold
);
if
(
autoDiff
)
{
prob
=
ReduceSumAll
(
lossTensor
);
prob
=
-
ReduceSumAll
(
lossTensor
);
}
loss
+=
prob
;
//printf("prob:%f", prob);
loss
+=
-
prob
;
wordCount
+=
ngramNum
;
wordCount
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
wordCountTotal
+=
ngramNum
;
if
(
++
step
>=
nStep
)
{
if
(
++
step
>=
nStep
)
{
isEnd
=
true
;
isEnd
=
true
;
break
;
break
;
}
}
...
@@ -521,7 +525,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -521,7 +525,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
fclose
(
file
);
fclose
(
file
);
if
(
isEnd
)
if
(
isEnd
)
break
;
break
;
Test
(
testFN
,
outputFN
,
model
);
Test
(
testFN
,
outputFN
,
model
);
...
@@ -535,17 +539,17 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
...
@@ -535,17 +539,17 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
elapsed
,
step
,
epoch
);
elapsed
,
step
,
epoch
);
delete
[]
ngrams
;
delete
[]
ngrams
;
}
}
/*
/*
update the model parameters using the delta rule
update the model parameters using the delta rule
>> model - the model to update
>> model - the model to update
>> grad - gradients
>> grad - gradients
>> epsilon - learning rate
>> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
>> isNodeGrad - indicates whether the gradient is associated with the node
*/
*/
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
)
void
Update
(
FNNModel
&
model
,
FNNModel
&
grad
,
float
epsilon
,
bool
isNodeGrad
)
{
{
TensorList
paraList
(
10
);
TensorList
paraList
(
10
);
TensorList
gradList
(
10
);
TensorList
gradList
(
10
);
...
@@ -559,7 +563,7 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
...
@@ -559,7 +563,7 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
paraList
.
Add
(
&
model
.
embeddingW
);
paraList
.
Add
(
&
model
.
embeddingW
);
if
(
!
isNodeGrad
)
{
if
(
!
isNodeGrad
)
{
gradList
.
Add
(
&
grad
.
outputW
);
gradList
.
Add
(
&
grad
.
outputW
);
gradList
.
Add
(
&
grad
.
outputB
);
gradList
.
Add
(
&
grad
.
outputB
);
...
@@ -567,10 +571,10 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
...
@@ -567,10 +571,10 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
gradList
.
Add
(
&
grad
.
hiddenW
[
i
]);
gradList
.
Add
(
&
grad
.
hiddenW
[
i
]);
gradList
.
Add
(
&
grad
.
hiddenB
[
i
]);
gradList
.
Add
(
&
grad
.
hiddenB
[
i
]);
}
}
;
;
gradList
.
Add
(
&
grad
.
embeddingW
);
gradList
.
Add
(
&
grad
.
embeddingW
);
}
}
else
{
else
{
gradList
.
Add
(
model
.
outputW
.
grad
);
gradList
.
Add
(
model
.
outputW
.
grad
);
gradList
.
Add
(
model
.
outputB
.
grad
);
gradList
.
Add
(
model
.
outputB
.
grad
);
...
@@ -592,17 +596,17 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
...
@@ -592,17 +596,17 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
/* the delta rule */
/* the delta rule */
_Sum
(
para
,
paraGrad
,
para
,
-
epsilon
);
_Sum
(
para
,
paraGrad
,
para
,
-
epsilon
);
}
}
}
}
/*
/*
get prediction probabilites of the gold words
get prediction probabilites of the gold words
>> output - output probabilities
>> output - output probabilities
>> gold - gold standard
>> gold - gold standard
>> wordPobs - probability of each word
>> wordPobs - probability of each word
<< return - probability of the batch
<< return - probability of the batch
*/
*/
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
)
float
GetProb
(
XTensor
&
output
,
XTensor
&
gold
,
XTensor
*
wordProbs
)
{
{
XTensor
probs
;
XTensor
probs
;
InitTensorV2
(
&
probs
,
&
output
);
InitTensorV2
(
&
probs
,
&
output
);
...
@@ -613,7 +617,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
...
@@ -613,7 +617,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
XTensor
wprobs
;
XTensor
wprobs
;
InitTensor1DV2
(
&
wprobs
,
output
.
GetDim
(
0
),
output
.
dataType
,
output
.
devID
);
InitTensor1DV2
(
&
wprobs
,
output
.
GetDim
(
0
),
output
.
dataType
,
output
.
devID
);
_ReduceSum
(
&
probs
,
&
wprobs
,
1
);
_ReduceSum
(
&
probs
,
&
wprobs
,
1
);
if
(
wordProbs
!=
NULL
)
if
(
wordProbs
!=
NULL
)
_CopyValues
(
&
wprobs
,
wordProbs
);
_CopyValues
(
&
wprobs
,
wordProbs
);
/* reshape the tensor to fit it into the reduce procedure
/* reshape the tensor to fit it into the reduce procedure
...
@@ -629,34 +633,34 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
...
@@ -629,34 +633,34 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
_ReduceSum
(
&
probs
,
&
result
,
1
);
_ReduceSum
(
&
probs
,
&
result
,
1
);
return
result
.
Get1D
(
0
);
return
result
.
Get1D
(
0
);
}
}
int
pin
=
0
;
int
pin
=
0
;
int
wordBufCount
=
0
;
int
wordBufCount
=
0
;
/*
/*
load a minibatch of ngrams
load a minibatch of ngrams
>> file - data file
>> file - data file
>> n - order of the language model
>> n - order of the language model
>> ngrams - the loaded ngrams
>> ngrams - the loaded ngrams
>> sentNum - maximum sentences kept in the minibatch
>> sentNum - maximum sentences kept in the minibatch
>> wordNum - maximum words kept in the minibatch
>> wordNum - maximum words kept in the minibatch
*/
*/
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
)
int
LoadNGrams
(
FILE
*
file
,
int
n
,
NGram
*
ngrams
,
int
sentNum
,
int
wordNum
)
{
{
int
num
=
0
;
int
num
=
0
;
int
lineNum
=
0
;
int
lineNum
=
0
;
while
(
pin
>
0
||
fgets
(
lineBuf
,
MAX_LINE_LENGTH_HERE
-
1
,
file
))
{
while
(
pin
>
0
||
fgets
(
lineBuf
,
MAX_LINE_LENGTH_HERE
-
1
,
file
))
{
if
(
pin
<=
0
)
{
if
(
pin
<=
0
)
{
int
len
=
(
int
)
strlen
(
lineBuf
);
int
len
=
(
int
)
strlen
(
lineBuf
);
while
(
lineBuf
[
len
-
1
]
==
'\r'
||
lineBuf
[
len
-
1
]
==
'\n'
)
{
while
(
lineBuf
[
len
-
1
]
==
'\r'
||
lineBuf
[
len
-
1
]
==
'\n'
)
{
lineBuf
[
len
-
1
]
=
0
;
lineBuf
[
len
-
1
]
=
0
;
len
--
;
len
--
;
}
}
len
=
(
int
)
strlen
(
lineBuf
);
len
=
(
int
)
strlen
(
lineBuf
);
if
(
len
==
0
)
if
(
len
==
0
)
continue
;
continue
;
/* how many characters are in a word */
/* how many characters are in a word */
...
@@ -666,9 +670,9 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
...
@@ -666,9 +670,9 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
int
wNum
=
0
;
int
wNum
=
0
;
int
i
=
0
;
int
i
=
0
;
for
(
i
=
pin
;
i
<
len
;
i
++
)
{
for
(
i
=
pin
;
i
<
len
;
i
++
)
{
/* load word (id) seperated by space or tab */
/* load word (id) seperated by space or tab */
if
((
lineBuf
[
i
]
==
' '
||
lineBuf
[
i
]
==
'\t'
)
&&
wSize
>
0
)
{
if
((
lineBuf
[
i
]
==
' '
||
lineBuf
[
i
]
==
'\t'
)
&&
wSize
>
0
)
{
lineBuf
[
i
]
=
0
;
lineBuf
[
i
]
=
0
;
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wSize
=
0
;
wSize
=
0
;
...
@@ -677,7 +681,7 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
...
@@ -677,7 +681,7 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
wSize
++
;
wSize
++
;
}
}
if
(
wSize
>
0
)
if
(
wSize
>
0
)
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wordBuf
[
wNum
++
]
=
atoi
(
lineBuf
+
i
-
wSize
);
wordBufCount
=
wNum
;
wordBufCount
=
wNum
;
...
@@ -689,69 +693,69 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
...
@@ -689,69 +693,69 @@ int LoadNGrams(FILE * file, int n, NGram * ngrams, int sentNum, int wordNum)
int
i
=
-
MAX_INT
;
int
i
=
-
MAX_INT
;
/* create ngrams */
/* create ngrams */
for
(
i
=
MAX
(
pin
,
n
-
1
);
i
<
wordBufCount
-
1
;
i
++
)
{
for
(
i
=
MAX
(
pin
,
n
-
1
);
i
<
wordBufCount
-
1
;
i
++
)
{
memcpy
(
ngrams
[
num
++
].
words
,
wordBuf
+
i
-
n
+
1
,
sizeof
(
int
)
*
n
);
memcpy
(
ngrams
[
num
++
].
words
,
wordBuf
+
i
-
n
+
1
,
sizeof
(
int
)
*
n
);
if
(
num
>=
wordNum
)
if
(
num
>=
wordNum
)
break
;
break
;
}
}
/* set a finished flag if we reach the end of the sentence*/
/* set a finished flag if we reach the end of the sentence*/
if
(
i
>=
wordBufCount
-
1
)
{
if
(
i
>=
wordBufCount
-
1
)
{
pin
=
0
;
pin
=
0
;
wordBufCount
=
0
;
wordBufCount
=
0
;
}
}
/* record where to start next time if we break in the middle */
/* record where to start next time if we break in the middle */
else
{
else
{
pin
=
i
+
1
;
pin
=
i
+
1
;
}
}
if
((
sentNum
>
0
&&
lineNum
>=
sentNum
)
||
num
>=
wordNum
)
if
((
sentNum
>
0
&&
lineNum
>=
sentNum
)
||
num
>=
wordNum
)
break
;
break
;
}
}
return
num
;
return
num
;
}
}
/*
/*
make a 2d tensor in zero-one representation
make a 2d tensor in zero-one representation
The indexed cell is set to 1, and 0 otherwise.
The indexed cell is set to 1, and 0 otherwise.
>> tensor - the tensor to initialize
>> tensor - the tensor to initialize
>> rowNum - number of rows
>> rowNum - number of rows
>> colNum - number of columns
>> colNum - number of columns
>> rows - row index
>> rows - row index
>> cols - column index
>> cols - column index
>> itemNum - number of non-zero items
>> itemNum - number of non-zero items
>> devID - device id
>> devID - device id
>> mem - memory pool
>> mem - memory pool
*/
*/
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
void
InitZeroOneTensor2D
(
XTensor
&
tensor
,
int
rowNum
,
int
colNum
,
int
*
rows
,
int
*
cols
,
int
itemNum
,
int
devID
,
XMem
*
mem
)
int
itemNum
,
int
devID
,
XMem
*
mem
)
{
{
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT
,
devID
);
InitTensor2DV2
(
&
tensor
,
rowNum
,
colNum
,
X_FLOAT
,
devID
);
tensor
.
SetZeroAll
();
tensor
.
SetZeroAll
();
/* set none-zero cells */
/* set none-zero cells */
for
(
int
i
=
0
;
i
<
itemNum
;
i
++
)
for
(
int
i
=
0
;
i
<
itemNum
;
i
++
)
tensor
.
Set2D
(
1.0
F
,
rows
[
i
],
cols
[
i
]);
tensor
.
Set2D
(
1.0
F
,
rows
[
i
],
cols
[
i
]);
}
}
/*
/*
make a tensor that encodes a batch of words
make a tensor that encodes a batch of words
>> batch - the tensor encoding a batch of words
>> batch - the tensor encoding a batch of words
>> ngrams - the ngram batch
>> ngrams - the ngram batch
>> ngramNum - batch size
>> ngramNum - batch size
>> n - indicate which word is encode for each ngram
>> n - indicate which word is encode for each ngram
>> vSize - vocabulary size
>> vSize - vocabulary size
>> devID - device id
>> devID - device id
>> mem - memory pool
>> mem - memory pool
*/
*/
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
)
void
MakeWordBatch
(
XTensor
&
batch
,
NGram
*
ngrams
,
int
ngramNum
,
int
n
,
int
vSize
,
int
devID
,
XMem
*
mem
)
{
{
int
*
rows
=
new
int
[
ngramNum
];
int
*
rows
=
new
int
[
ngramNum
];
int
*
cols
=
new
int
[
ngramNum
];
int
*
cols
=
new
int
[
ngramNum
];
for
(
int
i
=
0
;
i
<
ngramNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
ngramNum
;
i
++
)
{
rows
[
i
]
=
i
;
rows
[
i
]
=
i
;
cols
[
i
]
=
ngrams
[
i
].
words
[
n
];
cols
[
i
]
=
ngrams
[
i
].
words
[
n
];
}
}
...
@@ -760,31 +764,31 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
...
@@ -760,31 +764,31 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
delete
[]
rows
;
delete
[]
rows
;
delete
[]
cols
;
delete
[]
cols
;
}
}
/*
/*
forward procedure
forward procedure
>> inputs - input word representations
>> inputs - input word representations
>> output - output probability
>> output - output probability
>> model - the fnn model
>> model - the fnn model
>> net - the network that keeps the internal tensors generated in the process
>> net - the network that keeps the internal tensors generated in the process
*/
*/
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
)
void
Forward
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
,
FNNNet
&
net
)
{
{
int
batchSize
=
-
1
;
int
batchSize
=
-
1
;
int
n
=
model
.
n
;
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
int
depth
=
model
.
hDepth
;
TensorList
eList
(
n
-
1
);
TensorList
eList
(
n
-
1
);
/* previoius n - 1 words */
/* previoius n - 1 words */
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
{
XTensor
&
input
=
inputs
[
i
];
XTensor
&
input
=
inputs
[
i
];
XTensor
&
w
=
model
.
embeddingW
;
XTensor
&
w
=
model
.
embeddingW
;
XTensor
&
embedding
=
net
.
embeddings
[
i
];
XTensor
&
embedding
=
net
.
embeddings
[
i
];
if
(
batchSize
==
-
1
)
if
(
batchSize
==
-
1
)
batchSize
=
input
.
dimSize
[
0
];
batchSize
=
input
.
dimSize
[
0
];
else
{
else
{
CheckErrors
(
batchSize
==
input
.
dimSize
[
0
],
"Wrong input word representations!"
);
CheckErrors
(
batchSize
==
input
.
dimSize
[
0
],
"Wrong input word representations!"
);
}
}
...
@@ -804,7 +808,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -804,7 +808,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
_Concatenate
(
&
eList
,
&
net
.
embeddingCat
,
1
);
_Concatenate
(
&
eList
,
&
net
.
embeddingCat
,
1
);
/* go over each hidden layer */
/* go over each hidden layer */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
XTensor
&
h_pre
=
i
==
0
?
net
.
embeddingCat
:
net
.
hiddens
[
i
-
1
];
XTensor
&
h_pre
=
i
==
0
?
net
.
embeddingCat
:
net
.
hiddens
[
i
-
1
];
XTensor
&
w
=
model
.
hiddenW
[
i
];
XTensor
&
w
=
model
.
hiddenW
[
i
];
XTensor
&
b
=
model
.
hiddenB
[
i
];
XTensor
&
b
=
model
.
hiddenB
[
i
];
...
@@ -860,21 +864,21 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -860,21 +864,21 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* y = softmax(s) */
/* y = softmax(s) */
_LogSoftmax
(
&
s
,
&
y
,
1
);
_LogSoftmax
(
&
s
,
&
y
,
1
);
}
}
}
}
/*
/*
backward procedure
backward procedure
>> inputs - input word representations
>> inputs - input word representations
>> output - output probability
>> output - output probability
>> gold - gold standard
>> gold - gold standard
>> loss - loss function name
>> loss - loss function name
>> model - the fnn model
>> model - the fnn model
>> grad - the model that keeps the gradient information
>> grad - the model that keeps the gradient information
>> net - the network that keeps the internal tensors generated in the process
>> net - the network that keeps the internal tensors generated in the process
*/
*/
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
void
Backward
(
XTensor
inputs
[],
XTensor
&
output
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
,
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
)
FNNModel
&
model
,
FNNModel
&
grad
,
FNNNet
&
net
)
{
{
int
batchSize
=
output
.
GetDim
(
0
);
int
batchSize
=
output
.
GetDim
(
0
);
int
n
=
model
.
n
;
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
int
depth
=
model
.
hDepth
;
...
@@ -979,17 +983,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
...
@@ -979,17 +983,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
delete
dedy
;
delete
dedy
;
}
}
}
}
/*
/*
forward process (with tensor connections) (this is implemented by gather function)
forward process (with tensor connections) (this is implemented by gather function)
>> ngrams - the loaded ngrams
>> ngrams - the loaded ngrams
>> batch - the tensor encoding a batch of words
>> batch - the tensor encoding a batch of words
>> output - output probability
>> output - output probability
>> model - the fnn model
>> model - the fnn model
*/
*/
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
)
void
ForwardAutoDiff
(
NGram
*
ngrams
,
int
batch
,
XTensor
&
output
,
FNNModel
&
model
)
{
{
int
n
=
model
.
n
;
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
int
depth
=
model
.
hDepth
;
...
@@ -998,11 +1002,11 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
...
@@ -998,11 +1002,11 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
XTensor
hidden
;
XTensor
hidden
;
XTensor
b
;
XTensor
b
;
int
size
=
batch
*
(
n
-
1
);
int
size
=
batch
*
(
n
-
1
);
int
*
index
=
new
int
[
size
];
int
*
index
=
new
int
[
size
];
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch
;
i
++
)
{
for
(
int
j
=
0
;
j
<
n
-
1
;
j
++
)
{
for
(
int
j
=
0
;
j
<
n
-
1
;
j
++
)
{
int
a
=
i
*
(
n
-
1
)
+
j
;
int
a
=
i
*
(
n
-
1
)
+
j
;
index
[
a
]
=
ngrams
[
i
].
words
[
j
];
index
[
a
]
=
ngrams
[
i
].
words
[
j
];
}
}
...
@@ -1010,7 +1014,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
...
@@ -1010,7 +1014,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
InitTensor1DV2
(
&
words
,
size
,
X_INT
,
model
.
devID
);
InitTensor1DV2
(
&
words
,
size
,
X_INT
,
model
.
devID
);
words
.
SetData
(
index
,
size
);
words
.
SetData
(
index
,
size
);
words
.
Dump
(
stderr
,
"word:"
,
10
);
embeddingBig
=
Gather
(
model
.
embeddingW
,
words
);
embeddingBig
=
Gather
(
model
.
embeddingW
,
words
);
delete
[]
index
;
delete
[]
index
;
...
@@ -1018,26 +1022,26 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
...
@@ -1018,26 +1022,26 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
int
dimSize
[
2
];
int
dimSize
[
2
];
dimSize
[
0
]
=
embeddingBig
.
GetDim
(
0
)
/
(
n
-
1
);
dimSize
[
0
]
=
embeddingBig
.
GetDim
(
0
)
/
(
n
-
1
);
dimSize
[
1
]
=
embeddingBig
.
GetDim
(
1
)
*
(
n
-
1
);
dimSize
[
1
]
=
embeddingBig
.
GetDim
(
1
)
*
(
n
-
1
);
embeddingBig
.
Dump
(
stderr
,
"embeddingBig:"
,
10
);
hidden
=
Reshape
(
embeddingBig
,
embeddingBig
.
order
,
dimSize
);
hidden
=
Reshape
(
embeddingBig
,
embeddingBig
.
order
,
dimSize
);
hidden
.
Dump
(
stderr
,
"hidden-0:"
,
10
);
/* hidden layers */
/* hidden layers */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
hidden
=
HardTanH
(
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
]);
hidden
=
HardTanH
(
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
]);
hidden
.
Dump
(
stderr
,
"hidden-1:"
,
10
);
/* output layer */
/* output layer */
//output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
output
=
Softmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
output
=
Softmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
}
}
/*
/*
forward process (with tensor connections) (this is implemented by multiply function)
forward process (with tensor connections) (this is implemented by multiply function)
>> inputs - input word representations
>> inputs - input word representations
>> output - output probability
>> output - output probability
>> model - the fnn model
>> model - the fnn model
*/
*/
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
)
void
ForwardAutoDiff
(
XTensor
inputs
[],
XTensor
&
output
,
FNNModel
&
model
)
{
{
int
n
=
model
.
n
;
int
n
=
model
.
n
;
int
depth
=
model
.
hDepth
;
int
depth
=
model
.
hDepth
;
...
@@ -1047,7 +1051,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
...
@@ -1047,7 +1051,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
XTensor
b
;
XTensor
b
;
TensorList
inputList
(
n
-
1
);
TensorList
inputList
(
n
-
1
);
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
inputList
.
Add
(
inputs
+
i
);
inputList
.
Add
(
inputs
+
i
);
/* represent n - 1 words in one tensor */
/* represent n - 1 words in one tensor */
...
@@ -1061,21 +1065,21 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
...
@@ -1061,21 +1065,21 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
hidden
=
Merge
(
hidden
,
2
,
0
);
hidden
=
Merge
(
hidden
,
2
,
0
);
/* hidden layers */
/* hidden layers */
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
hidden
=
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
];
hidden
=
MMul
(
hidden
,
model
.
hiddenW
[
i
])
+
model
.
hiddenB
[
i
];
/* output layer */
/* output layer */
output
=
LogSoftmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
output
=
LogSoftmax
(
MMul
(
hidden
,
model
.
outputW
)
+
model
.
outputB
,
1
);
}
}
/*
/*
dump the model to the disk space
dump the model to the disk space
>> fn - where to keep the model
>> fn - where to keep the model
>> model - the fnn model
>> model - the fnn model
*/
*/
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
)
void
Dump
(
const
char
*
fn
,
FNNModel
&
model
)
{
{
FILE
*
file
=
fopen
(
fn
,
"wb"
);
FILE
*
file
=
fopen
(
fn
,
"wb"
);
CheckErrors
(
file
,
"Cannot open the model file"
);
CheckErrors
(
file
,
"Cannot open the model file"
);
...
@@ -1094,15 +1098,15 @@ void Dump(const char * fn, FNNModel &model)
...
@@ -1094,15 +1098,15 @@ void Dump(const char * fn, FNNModel &model)
fclose
(
file
);
fclose
(
file
);
XPRINT
(
0
,
stderr
,
"[INFO] model saved
\n
"
);
XPRINT
(
0
,
stderr
,
"[INFO] model saved
\n
"
);
}
}
/*
/*
read the model from the disk space
read the model from the disk space
>> fn - where to keep the model
>> fn - where to keep the model
>> model - the fnn model
>> model - the fnn model
*/
*/
void
Read
(
const
char
*
fn
,
FNNModel
&
model
)
void
Read
(
const
char
*
fn
,
FNNModel
&
model
)
{
{
FILE
*
file
=
fopen
(
fn
,
"rb"
);
FILE
*
file
=
fopen
(
fn
,
"rb"
);
CheckErrors
(
file
,
"Cannot open the model file"
);
CheckErrors
(
file
,
"Cannot open the model file"
);
...
@@ -1121,16 +1125,16 @@ void Read(const char * fn, FNNModel &model)
...
@@ -1121,16 +1125,16 @@ void Read(const char * fn, FNNModel &model)
fclose
(
file
);
fclose
(
file
);
XPRINT
(
0
,
stderr
,
"[INFO] model loaded
\n
"
);
XPRINT
(
0
,
stderr
,
"[INFO] model loaded
\n
"
);
}
}
/*
/*
test the model
test the model
>> test - test data file
>> test - test data file
>> result - where to keep the result
>> result - where to keep the result
>> model - the fnn model
>> model - the fnn model
*/
*/
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
)
void
Test
(
const
char
*
test
,
const
char
*
result
,
FNNModel
&
model
)
{
{
int
wordCount
=
0
;
int
wordCount
=
0
;
int
sentCount
=
0
;
int
sentCount
=
0
;
float
loss
=
0
;
float
loss
=
0
;
...
@@ -1173,14 +1177,13 @@ void Test(const char * test, const char * result, FNNModel &model)
...
@@ -1173,14 +1177,13 @@ void Test(const char * test, const char * result, FNNModel &model)
if
(
!
autoDiff
)
{
if
(
!
autoDiff
)
{
/* prepare an empty network for building the fnn */
/* prepare an empty network for building the fnn */
FNNNet
net
;
FNNNet
net
;
/* forward computation */
/* forward computation */
Forward
(
inputs
,
output
,
model
,
net
);
Forward
(
inputs
,
output
,
model
,
net
);
}
}
else
{
else
{
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
ForwardAutoDiff
(
ngrams
,
ngramNum
,
output
,
model
);
output
=
Log
(
output
);
/* this is implemented by multiply function */
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
//ForwardAutoDiff(inputs, output, model);
}
}
...
@@ -1213,11 +1216,11 @@ void Test(const char * test, const char * result, FNNModel &model)
...
@@ -1213,11 +1216,11 @@ void Test(const char * test, const char * result, FNNModel &model)
double
elapsed
=
GetClockSec
()
-
startT
;
double
elapsed
=
GetClockSec
()
-
startT
;
XPRINT1
(
0
,
stderr
,
"[INFO] ppl=%.2f
\n
"
,
exp
(
loss
/
wordCount
));
XPRINT1
(
0
,
stderr
,
"[INFO] ppl=%.2f
\n
"
,
exp
(
loss
/
wordCount
));
XPRINT3
(
0
,
stderr
,
"[INFO] test finished (took %.1fs, sentence=%d and ngram=%d)
\n
"
,
XPRINT3
(
0
,
stderr
,
"[INFO] test finished (took %.1fs, sentence=%d and ngram=%d)
\n
"
,
elapsed
,
sentCount
,
wordCount
);
elapsed
,
sentCount
,
wordCount
);
delete
[]
ngrams
;
delete
[]
ngrams
;
}
}
};
};
source/tensor/Main.cpp
查看文件 @
2c4061e9
...
@@ -28,7 +28,7 @@
...
@@ -28,7 +28,7 @@
#include <time.h>
#include <time.h>
#include "XTensor.h"
#include "XTensor.h"
#include "XDevice.h"
#include "XDevice.h"
#include "./test/Test.h"
//
#include "./test/Test.h"
#include "./core/CHeader.h"
#include "./core/CHeader.h"
#include "./loss/CrossEntropy.h"
#include "./loss/CrossEntropy.h"
...
@@ -44,7 +44,7 @@ void LittleTest();
...
@@ -44,7 +44,7 @@ void LittleTest();
void
T2TTest
();
void
T2TTest
();
void
T2TTest2
();
void
T2TTest2
();
void
PowerTest
();
void
PowerTest
();
void
Tests
();
int
main
(
int
argc
,
const
char
**
argv
)
int
main
(
int
argc
,
const
char
**
argv
)
{
{
//PowerTest();
//PowerTest();
...
@@ -63,7 +63,7 @@ int main( int argc, const char ** argv )
...
@@ -63,7 +63,7 @@ int main( int argc, const char ** argv )
//return 0;
//return 0;
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
Test
();
Test
s
();
else
{
else
{
fprintf
(
stderr
,
"Thanks for using NiuTrans.Tensor! This is a library that eases the
\n
"
);
fprintf
(
stderr
,
"Thanks for using NiuTrans.Tensor! This is a library that eases the
\n
"
);
fprintf
(
stderr
,
"use of tensors. All you need is to ...
\n\n
"
);
fprintf
(
stderr
,
"use of tensors. All you need is to ...
\n\n
"
);
...
@@ -75,219 +75,223 @@ int main( int argc, const char ** argv )
...
@@ -75,219 +75,223 @@ int main( int argc, const char ** argv )
return
0
;
return
0
;
}
}
void
myRead
(
XTensor
*
tensor
,
const
char
*
filename
,
const
char
*
label
)
void
Tests
()
{
{
FILE
*
file
=
fopen
(
filename
,
"rb"
);
if
(
file
==
NULL
)
printf
(
"%s
\n
"
,
filename
);
tensor
->
Read
(
file
,
label
);
}
void
myDump
(
XTensor
*
tensor
,
const
char
*
filename
,
const
char
*
label
)
{
FILE
*
file
=
fopen
(
filename
,
"wb"
);
if
(
file
==
NULL
)
printf
(
"%s
\n
"
,
filename
);
tensor
->
Dump
(
file
,
label
);
}
void
PowerTest
()
{
XTensor
input
;
XTensor
output
;
InitTensor2D
(
&
input
,
256
,
10000
,
X_FLOAT
,
0
);
InitTensor2D
(
&
output
,
256
,
10000
,
X_FLOAT
,
0
);
myRead
(
&
input
,
"1.txt"
,
""
);
_Power
(
&
input
,
&
output
,
2
);
output
.
Dump
(
stderr
,
""
,
200
);
}
void
SmallTest
()
{
XTensor
a
;
XTensor
b
;
XTensor
c
;
XTensor
d
;
InitTensor2D
(
&
a
,
2
,
2
);
InitTensor2D
(
&
b
,
2
,
2
);
a
.
SetZeroAll
();
b
.
SetZeroAll
();
a
.
Set2D
(
1.0
F
,
0
,
0
);
a
.
Set2D
(
2.0
F
,
1
,
1
);
b
=
Sum
(
a
,
Multiply
(
a
,
a
));
/* this is prohibited !!!!!!!!!!!!! */
//XTensor c = a * b + a;
//XTensor d = a + b + c.Lin(0.5F);
c
=
a
*
b
+
a
;
d
=
a
+
b
+
c
.
Lin
(
0.5
F
);
XLink
::
CheckNetwork
(
&
d
);
//XLink::ShowNetwork(stderr, &d);
a
.
Dump
(
stderr
,
"a:"
);
b
.
Dump
(
stderr
,
"b:"
);
c
.
Dump
(
stderr
,
"c:"
);
d
.
Dump
(
stderr
,
"d:"
);
}
void
TransposeTest
()
{
XTensor
a
;
XTensor
b
;
int
I
=
2
;
int
J
=
3
;
InitTensor4D
(
&
a
,
2
,
3
,
4
,
5
);
int
*
dims
=
new
int
[
a
.
order
];
memcpy
(
dims
,
a
.
dimSize
,
sizeof
(
int
)
*
a
.
order
);
dims
[
I
]
=
a
.
dimSize
[
J
];
dims
[
J
]
=
a
.
dimSize
[
I
];
InitTensor
(
&
b
,
4
,
dims
);
a
.
SetZeroAll
();
b
.
SetZeroAll
();
float
*
data
=
new
float
[
a
.
unitNum
];
for
(
int
i
=
0
;
i
<
a
.
unitNum
;
i
++
)
data
[
i
]
=
(
float
)
i
;
a
.
SetData
(
data
,
a
.
unitNum
,
0
);
_Transpose
(
&
a
,
&
b
,
I
,
J
);
b
.
Dump
(
stderr
,
"b:"
);
delete
[]
data
;
}
void
LittleTest
()
{
int
a
=
5000
;
int
b
=
100000
;
int
c
=
a
*
b
;
printf
(
"%d
\n
"
,
c
);
exit
(
1
);
}
}
void
T2TTest
()
//void myRead(XTensor * tensor, const char * filename, const char * label)
{
//{
XTensor
*
input
;
// FILE * file = fopen(filename, "rb");
XTensor
*
weight
;
// if(file == NULL)
XTensor
*
output
;
// printf("%s\n", filename);
XTensor
*
gold
;
// tensor->Read(file, label);
XTensor
*
dedy
;
//}
XTensor
*
dedx
;
//
XTensor
*
dedxTmp
;
//void myDump(XTensor * tensor, const char * filename, const char * label)
XTensor
*
dedw
;
//{
XTensor
*
padding
;
// FILE * file = fopen(filename, "wb");
// if(file == NULL)
DTYPE
loss
;
// printf("%s\n", filename);
// tensor->Dump(file, label);
int
*
dimSize
=
new
int
[
2
];
//}
dimSize
[
0
]
=
256
;
//
dimSize
[
1
]
=
10001
;
//void PowerTest()
//{
int
*
dimSize2
=
new
int
[
3
];
// XTensor input;
dimSize2
[
0
]
=
2
;
// XTensor output;
dimSize2
[
1
]
=
31
;
// InitTensor2D(&input, 256, 10000, X_FLOAT, 0);
dimSize2
[
2
]
=
256
;
// InitTensor2D(&output, 256, 10000, X_FLOAT, 0);
// myRead(&input, "1.txt", "");
int
*
dimSize3
=
new
int
[
3
];
//
dimSize3
[
0
]
=
2
;
// _Power(&input, &output, 2);
dimSize3
[
1
]
=
31
;
// output.Dump(stderr, "", 200);
dimSize3
[
2
]
=
10001
;
//}
//
int
*
dimSize4
=
new
int
[
2
];
//void SmallTest()
dimSize4
[
0
]
=
2
;
//{
dimSize4
[
1
]
=
31
;
// XTensor a;
// XTensor b;
input
=
NewTensor
(
3
,
dimSize2
,
X_FLOAT
,
1.0
F
,
0
);
// XTensor c;
weight
=
NewTensor
(
2
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
// XTensor d;
dedw
=
NewTensor
(
2
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
//
gold
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
// InitTensor2D(&a, 2, 2);
output
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
// InitTensor2D(&b, 2, 2);
dedy
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
// a.SetZeroAll();
dedx
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
// b.SetZeroAll();
dedxTmp
=
NewTensor
(
3
,
dimSize3
,
X_FLOAT
,
1.0
F
,
0
);
// a.Set2D(1.0F, 0, 0);
padding
=
NewTensor
(
2
,
dimSize4
,
X_FLOAT
,
1.0
F
,
0
);
// a.Set2D(2.0F, 1, 1);
//
//weight = NewTensor(2, dimSize);
// b = Sum(a, Multiply(a, a));
//dedw = NewTensor(2, dimSize);
//
//input = NewTensor(3, dimSize2);
// /* this is prohibited !!!!!!!!!!!!! */
//gold = NewTensor(3, dimSize3);
// //XTensor c = a * b + a;
//output = NewTensor(3, dimSize3);
// //XTensor d = a + b + c.Lin(0.5F);
//dedy = NewTensor(3, dimSize3);
//
//dedx = NewTensor(3, dimSize3);
// c = a * b + a;
//dedxTmp = NewTensor(3, dimSize3);
// d = a + b + c.Lin(0.5F);
//padding = NewTensor(2, dimSize4);
//
// XLink::CheckNetwork(&d);
myRead
(
input
,
"x.txt"
,
"x"
);
// //XLink::ShowNetwork(stderr, &d);
myRead
(
weight
,
"w.txt"
,
"w"
);
//
myRead
(
gold
,
"gold.txt"
,
"gold"
);
// a.Dump(stderr, "a:");
myRead
(
padding
,
"padding.txt"
,
"padding"
);
// b.Dump(stderr, "b:");
// c.Dump(stderr, "c:");
XTensor
inter
;
// d.Dump(stderr, "d:");
inter
=
MMul
(
*
input
,
*
weight
);
//}
//
_Softmax
(
&
inter
,
output
,
2
);
//void TransposeTest()
//{
//_LogMe(output);
// XTensor a;
loss
=
_CrossEntropyFast
(
output
,
gold
,
REDUCE_MEAN
,
NULL
,
padding
);
// XTensor b;
//
printf
(
"loss: %f
\n
"
,
loss
);
// int I = 2;
// int J = 3;
_CrossEntropyBackward
(
dedy
,
output
,
gold
,
NULL
);
//
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
// InitTensor4D(&a, 2, 3, 4, 5);
//
myDump
(
dedy
,
"dedy.txt"
,
"dedy"
);
// int * dims = new int[a.order];
// memcpy(dims, a.dimSize, sizeof(int) * a.order);
_SoftmaxBackward
(
NULL
,
output
,
input
,
dedy
,
dedx
,
NULL
,
-
1
,
NOLOSS
);
// dims[I] = a.dimSize[J];
_Sub
(
output
,
gold
,
dedxTmp
);
// dims[J] = a.dimSize[I];
//
myDump
(
dedx
,
"dedx.txt"
,
"dedx"
);
// InitTensor(&b, 4, dims);
dedx
->
Dump
(
stderr
,
"dedx"
,
200
);
//
dedxTmp
->
Dump
(
stderr
,
"dedxTmp"
,
200
);
// a.SetZeroAll();
// b.SetZeroAll();
input
->
Reshape
(
input
->
unitNum
/
input
->
GetDim
(
-
1
),
input
->
GetDim
(
-
1
));
//
dedx
->
Reshape
(
dedx
->
unitNum
/
dedx
->
GetDim
(
-
1
),
dedx
->
GetDim
(
-
1
));
// float * data = new float[a.unitNum];
// for(int i = 0; i < a.unitNum; i++)
_MatrixMulBatched
(
input
,
X_TRANS
,
dedx
,
X_NOTRANS
,
dedw
);
// data[i] = (float)i;
//
myDump
(
dedw
,
"dedw.txt"
,
"dedw"
);
// a.SetData(data, a.unitNum, 0);
}
//
// _Transpose(&a, &b, I, J);
void
T2TTest2
()
// b.Dump(stderr, "b:");
{
//
int
dimSize
[
3
];
// delete[] data;
dimSize
[
0
]
=
161
;
//}
dimSize
[
1
]
=
47
;
//
dimSize
[
2
]
=
10001
;
//void LittleTest()
XTensor
*
probs
=
NewTensor
(
3
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
//{
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
// int a = 5000;
// int b = 100000;
//myRead(probs, "probs.txt", " ");
// int c = a*b;
_SetDataFixedFloat
(
probs
,
1.0
F
);
// printf("%d\n", c);
//
probs
->
Reshape
(
1
,
probs
->
unitNum
);
// exit(1);
//}
DTYPE
sum
=
_ReduceSumAll
(
probs
);
//
printf
(
"%e
\n
"
,
sum
);
//void T2TTest()
//{
//XTensor tmp;
// XTensor * input;
//tmp = IsNonZero(*probs);
// XTensor * weight;
//DTYPE nonZeroNum = ReduceSumAll(tmp);
// XTensor * output;
//printf("%f\n", nonZeroNum);
// XTensor * gold;
//
// XTensor * dedy;
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
// XTensor * dedx;
// XTensor * dedxTmp;
//printf("%e\n", gpu);
// XTensor * dedw;
}
// XTensor * padding;
//
// DTYPE loss;
//
// int * dimSize = new int[2];
// dimSize[0] = 256;
// dimSize[1] = 10001;
//
// int * dimSize2 = new int[3];
// dimSize2[0] = 2;
// dimSize2[1] = 31;
// dimSize2[2] = 256;
//
// int * dimSize3 = new int[3];
// dimSize3[0] = 2;
// dimSize3[1] = 31;
// dimSize3[2] = 10001;
//
// int * dimSize4 = new int[2];
// dimSize4[0] = 2;
// dimSize4[1] = 31;
//
// input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
// weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
// dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
// gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
// padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//
// //weight = NewTensor(2, dimSize);
// //dedw = NewTensor(2, dimSize);
// //input = NewTensor(3, dimSize2);
// //gold = NewTensor(3, dimSize3);
// //output = NewTensor(3, dimSize3);
// //dedy = NewTensor(3, dimSize3);
// //dedx = NewTensor(3, dimSize3);
// //dedxTmp = NewTensor(3, dimSize3);
// //padding = NewTensor(2, dimSize4);
//
// myRead(input, "x.txt", "x");
// myRead(weight, "w.txt", "w");
// myRead(gold, "gold.txt", "gold");
// myRead(padding, "padding.txt", "padding");
//
// XTensor inter;
// inter = MMul(*input, *weight);
//
// _Softmax(&inter, output, 2);
//
// //_LogMe(output);
// loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
//
// printf("loss: %f\n", loss);
//
// _CrossEntropyBackward(dedy, output, gold, NULL);
// //_CrossEntropyBackward(dedy, output, gold, NULL, padding);
//
// myDump(dedy, "dedy.txt", "dedy");
//
// _SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
// _Sub(output, gold, dedxTmp);
//
// myDump(dedx, "dedx.txt", "dedx");
// dedx->Dump(stderr, "dedx", 200);
// dedxTmp->Dump(stderr, "dedxTmp", 200);
//
// input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
// dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
//
// _MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
//
// myDump(dedw, "dedw.txt", "dedw");
//}
//
//void T2TTest2()
//{
// int dimSize[3];
// dimSize[0] = 161;
// dimSize[1] = 47;
// dimSize[2] = 10001;
// XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
// //XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//
// //myRead(probs, "probs.txt", " ");
// _SetDataFixedFloat(probs, 1.0F);
//
// probs->Reshape(1, probs->unitNum);
//
// DTYPE sum = _ReduceSumAll(probs);
// printf("%e\n", sum);
//
// //XTensor tmp;
// //tmp = IsNonZero(*probs);
// //DTYPE nonZeroNum = ReduceSumAll(tmp);
// //printf("%f\n", nonZeroNum);
// //
// //DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//
// //printf("%e\n", gpu);
//}
source/tensor/loss/CrossEntropy.cu
查看文件 @
2c4061e9
...
@@ -196,17 +196,17 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
...
@@ -196,17 +196,17 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
delete[] dims;
delete[] dims;
}
}
if(padding != NULL) {
//
if(padding != NULL) {
XTensor * tmp = NewTensor(padding);
//
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
//
_IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
//
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
//
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
//
delete tmp;
}
//
}
else {
//
else {
int num = dedy->unitNum / dedy->GetDim(n);
//
int num = dedy->unitNum / dedy->GetDim(n);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num);
//
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)num);
}
//
}
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论