Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
杨迪
NiuTrans.Tensor
Commits
66855df5
Commit
66855df5
authored
Jul 16, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refine some bad code
parent
2046cd23
显示空白字符变更
内嵌
并排
正在显示
15 个修改的文件
包含
48 行增加
和
40 行删除
+48
-40
source/tensor/Main.cpp
+3
-3
source/tensor/core/arithmetic/MatrixMul.cpp
+2
-2
source/tensor/core/arithmetic/MatrixMulBatched.cpp
+3
-3
source/tensor/core/getandset/Select.cpp
+1
-1
source/tensor/core/movement/CopyIndexed.cpp
+1
-1
source/tensor/core/reduce/ReduceMax.cpp
+1
-1
source/tensor/core/reduce/ReduceMean.cpp
+1
-1
source/tensor/core/reduce/ReduceSum.cpp
+1
-1
source/tensor/core/reduce/ReduceSumSquared.cpp
+1
-1
source/tensor/core/reduce/ReduceVariance.cpp
+1
-1
source/tensor/core/shape/Merge.cpp
+2
-2
source/tensor/core/shape/Split.cpp
+2
-2
source/tensor/core/shape/Unsqueeze.cpp
+1
-1
source/tensor/function/Loss.cpp
+8
-0
source/tensor/sample/fnnlm/FNNLM.cpp
+20
-20
没有找到文件。
source/tensor/Main.cpp
查看文件 @
66855df5
...
@@ -45,7 +45,7 @@ int main( int argc, const char ** argv )
...
@@ -45,7 +45,7 @@ int main( int argc, const char ** argv )
//_CrtSetBreakAlloc(123);
//_CrtSetBreakAlloc(123);
/* a tiny test */
/* a tiny test */
if
(
tru
e
)
if
(
fals
e
)
SmallTest
();
SmallTest
();
//_CrtDumpMemoryLeaks();
//_CrtDumpMemoryLeaks();
...
@@ -53,8 +53,8 @@ int main( int argc, const char ** argv )
...
@@ -53,8 +53,8 @@ int main( int argc, const char ** argv )
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-test"
))
Test
();
Test
();
//
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
else
if
(
argc
>
1
&&
!
strcmp
(
argv
[
1
],
"-fnnlm"
))
//
FNNLMMain(argc - 1, argv + 1);
FNNLMMain
(
argc
-
1
,
argv
+
1
);
else
{
else
{
fprintf
(
stderr
,
"Thanks for using NiuTrans.Tensor! This is a library that eases the
\n
"
);
fprintf
(
stderr
,
"Thanks for using NiuTrans.Tensor! This is a library that eases the
\n
"
);
fprintf
(
stderr
,
"use of tensors. All you need is to ...
\n\n
"
);
fprintf
(
stderr
,
"use of tensors. All you need is to ...
\n\n
"
);
...
...
source/tensor/core/arithmetic/MatrixMul.cpp
查看文件 @
66855df5
...
@@ -210,7 +210,7 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
...
@@ -210,7 +210,7 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
XTensor
MatrixMul
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
MatrixMul
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
{
{
CheckNTErrors
((
&
a
&&
&
b
),
"Empty input tensors!"
);
CheckNTErrors
((
&
a
!=
&
NULLTensor
&&
&
b
!=
&
NULLTensor
),
"Empty input tensors!"
);
CheckNTErrors
((
a
.
dataType
==
b
.
dataType
),
"Input tensors should have the same data type!"
);
CheckNTErrors
((
a
.
dataType
==
b
.
dataType
),
"Input tensors should have the same data type!"
);
CheckNTErrors
((
a
.
order
>=
2
&&
b
.
order
>=
2
),
"Input tensors must have a order >= 2!"
);
CheckNTErrors
((
a
.
order
>=
2
&&
b
.
order
>=
2
),
"Input tensors must have a order >= 2!"
);
...
@@ -246,7 +246,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
...
@@ -246,7 +246,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
XLink
::
AddParamToHead
(
&
c
,
beta
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
c
;
return
c
;
}
}
...
...
source/tensor/core/arithmetic/MatrixMulBatched.cpp
查看文件 @
66855df5
...
@@ -175,9 +175,9 @@ where trans() returns the transposed matrix if the flag is fired.
...
@@ -175,9 +175,9 @@ where trans() returns the transposed matrix if the flag is fired.
XTensor
MatrixMulBatched
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
MatrixMulBatched
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
{
{
CheckNTErrors
(
(
&
a
&&
&
b
)
,
"Empty input tensors!"
);
CheckNTErrors
(
&
a
!=
&
NULLTensor
&&
&
b
!=
&
NULLTensor
,
"Empty input tensors!"
);
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
(
a
.
order
>=
2
&&
b
.
order
>=
2
)
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
int
an
=
transposedA
==
X_TRANS
?
a
.
dimSizeRDI
[
0
]
:
a
.
dimSizeRDI
[
1
];
int
an
=
transposedA
==
X_TRANS
?
a
.
dimSizeRDI
[
0
]
:
a
.
dimSizeRDI
[
1
];
...
@@ -210,7 +210,7 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
...
@@ -210,7 +210,7 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
XLink
::
AddParamToHead
(
&
c
,
beta
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
c
;
return
c
;
}
}
...
...
source/tensor/core/getandset/Select.cpp
查看文件 @
66855df5
...
@@ -123,7 +123,7 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
...
@@ -123,7 +123,7 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
XLink
::
AddParamToHead
(
&
c
,
high
);
XLink
::
AddParamToHead
(
&
c
,
high
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
c
;
return
c
;
}
}
...
...
source/tensor/core/movement/CopyIndexed.cpp
查看文件 @
66855df5
...
@@ -131,7 +131,7 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
...
@@ -131,7 +131,7 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
_CopyIndexed
(
&
s
,
&
t
,
dim
,
srcIndex
,
indexSize
,
tgtIndex
,
copyNum
);
_CopyIndexed
(
&
s
,
&
t
,
dim
,
srcIndex
,
indexSize
,
tgtIndex
,
copyNum
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
/* tensor connection */
/* tensor connection */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
...
...
source/tensor/core/reduce/ReduceMax.cpp
查看文件 @
66855df5
...
@@ -121,7 +121,7 @@ XTensor ReduceMax(const XTensor &input, int dim)
...
@@ -121,7 +121,7 @@ XTensor ReduceMax(const XTensor &input, int dim)
_ReduceMax
(
&
input
,
&
output
,
dim
);
_ReduceMax
(
&
input
,
&
output
,
dim
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
/* tensor connection */
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
...
...
source/tensor/core/reduce/ReduceMean.cpp
查看文件 @
66855df5
...
@@ -82,7 +82,7 @@ XTensor ReduceMean(const XTensor &input, int dim)
...
@@ -82,7 +82,7 @@ XTensor ReduceMean(const XTensor &input, int dim)
XLink
::
AddParamToHead
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
dim
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
output
;
return
output
;
}
}
...
...
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
66855df5
...
@@ -239,7 +239,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
...
@@ -239,7 +239,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHead
(
&
output
,
power
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
output
;
return
output
;
}
}
...
...
source/tensor/core/reduce/ReduceSumSquared.cpp
查看文件 @
66855df5
...
@@ -78,7 +78,7 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
...
@@ -78,7 +78,7 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
XLink
::
AddParamToHead
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
dim
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
output
;
return
output
;
}
}
...
...
source/tensor/core/reduce/ReduceVariance.cpp
查看文件 @
66855df5
...
@@ -76,7 +76,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
...
@@ -76,7 +76,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
output
;
return
output
;
}
}
...
...
source/tensor/core/shape/Merge.cpp
查看文件 @
66855df5
...
@@ -188,7 +188,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
...
@@ -188,7 +188,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
_Merge
(
&
s
,
&
t
,
whereToMerge
,
leadingDim
);
_Merge
(
&
s
,
&
t
,
whereToMerge
,
leadingDim
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
t
;
return
t
;
}
}
...
@@ -335,7 +335,7 @@ XTensor Merge(const XList &smalls, int whereToMerge)
...
@@ -335,7 +335,7 @@ XTensor Merge(const XList &smalls, int whereToMerge)
_Merge
(
&
smalls
,
&
big
,
whereToMerge
);
_Merge
(
&
smalls
,
&
big
,
whereToMerge
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
big
;
return
big
;
}
}
...
...
source/tensor/core/shape/Split.cpp
查看文件 @
66855df5
...
@@ -162,7 +162,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
...
@@ -162,7 +162,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
_Split
(
&
s
,
&
t
,
whereToSplit
,
splitNum
);
_Split
(
&
s
,
&
t
,
whereToSplit
,
splitNum
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
t
;
return
t
;
}
}
...
@@ -308,7 +308,7 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
...
@@ -308,7 +308,7 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
_Split
(
&
big
,
&
smalls
,
whereToSplit
,
splitNum
);
_Split
(
&
big
,
&
smalls
,
whereToSplit
,
splitNum
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
smalls
;
return
smalls
;
}
}
...
...
source/tensor/core/shape/Unsqueeze.cpp
查看文件 @
66855df5
...
@@ -130,7 +130,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
...
@@ -130,7 +130,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
_Unsqueeze
(
&
a
,
&
b
,
dim
,
dSize
);
_Unsqueeze
(
&
a
,
&
b
,
dim
,
dSize
);
/* destroy variables */
/* destroy variables */
delete
dimSize
;
delete
[]
dimSize
;
return
b
;
return
b
;
}
}
...
...
source/tensor/function/Loss.cpp
查看文件 @
66855df5
...
@@ -178,7 +178,11 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
...
@@ -178,7 +178,11 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
}
}
}
}
else
{
else
{
#ifdef USE_CUDA
error
=
CudaLossCompute
(
gold
,
output
,
LFName
,
isLogOutput
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
error
=
CudaLossCompute
(
gold
,
output
,
LFName
,
isLogOutput
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
}
}
return
error
;
return
error
;
...
@@ -476,7 +480,11 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
...
@@ -476,7 +480,11 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
}
}
}
}
else
{
else
{
#ifdef USE_CUDA
CudaLossBackward
(
dedy
,
t
,
y
,
LFName
,
leadDim
,
tBeg
,
tLen
,
yBeg
);
CudaLossBackward
(
dedy
,
t
,
y
,
LFName
,
leadDim
,
tBeg
,
tLen
,
yBeg
);
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
}
}
}
}
...
...
source/tensor/sample/fnnlm/FNNLM.cpp
查看文件 @
66855df5
...
@@ -490,9 +490,9 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
...
@@ -490,9 +490,9 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability of each word */
/* probability of each word */
XTensor
wprobs
;
XTensor
wprobs
;
InitTensor1D
(
&
wprobs
,
output
.
GetDim
(
0
),
output
.
dataType
,
output
.
devID
,
output
.
mem
);
InitTensor1D
(
&
wprobs
,
output
.
GetDim
(
0
),
output
.
dataType
,
output
.
devID
,
output
.
mem
);
ReduceSum
(
&
probs
,
&
wprobs
,
1
);
_
ReduceSum
(
&
probs
,
&
wprobs
,
1
);
if
(
wordProbs
!=
NULL
)
if
(
wordProbs
!=
NULL
)
CopyValues
(
&
wprobs
,
wordProbs
);
_
CopyValues
(
&
wprobs
,
wordProbs
);
/* reshape the tensor to fit it into the reduce procedure
/* reshape the tensor to fit it into the reduce procedure
TODO: XTensor supports scalars */
TODO: XTensor supports scalars */
...
@@ -504,7 +504,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
...
@@ -504,7 +504,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability for the batch */
/* probability for the batch */
XTensor
result
;
XTensor
result
;
InitTensor1D
(
&
result
,
1
,
X_FLOAT
,
output
.
devID
,
output
.
mem
);
InitTensor1D
(
&
result
,
1
,
X_FLOAT
,
output
.
devID
,
output
.
mem
);
ReduceSum
(
&
probs
,
&
result
,
1
);
_
ReduceSum
(
&
probs
,
&
result
,
1
);
return
result
.
Get1D
(
0
);
return
result
.
Get1D
(
0
);
}
}
...
@@ -673,7 +673,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -673,7 +673,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* generate word embedding of position i:
/* generate word embedding of position i:
embedding = input * w */
embedding = input * w */
MatrixMul
(
&
input
,
X_NOTRANS
,
&
w
,
X_NOTRANS
,
&
embedding
);
_
MatrixMul
(
&
input
,
X_NOTRANS
,
&
w
,
X_NOTRANS
,
&
embedding
);
eList
.
Add
(
&
net
.
embeddings
[
i
]);
eList
.
Add
(
&
net
.
embeddings
[
i
]);
}
}
...
@@ -681,7 +681,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -681,7 +681,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* concatenate word embeddings
/* concatenate word embeddings
embeddingcat = cat(embedding_0...embedding_{n-1}) */
embeddingcat = cat(embedding_0...embedding_{n-1}) */
InitModelTensor2D
(
net
.
embeddingCat
,
batchSize
,
(
n
-
1
)
*
model
.
eSize
,
model
);
InitModelTensor2D
(
net
.
embeddingCat
,
batchSize
,
(
n
-
1
)
*
model
.
eSize
,
model
);
Concatenate
(
&
eList
,
&
net
.
embeddingCat
,
1
);
_
Concatenate
(
&
eList
,
&
net
.
embeddingCat
,
1
);
/* go over each hidden layer */
/* go over each hidden layer */
for
(
int
i
=
0
;
i
<
depth
;
i
++
){
for
(
int
i
=
0
;
i
<
depth
;
i
++
){
...
@@ -696,12 +696,12 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -696,12 +696,12 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* generate hidden states of layer i:
/* generate hidden states of layer i:
s = h_pre * w */
s = h_pre * w */
MatrixMul
(
&
h_pre
,
X_NOTRANS
,
&
w
,
X_NOTRANS
,
&
s
);
_
MatrixMul
(
&
h_pre
,
X_NOTRANS
,
&
w
,
X_NOTRANS
,
&
s
);
/* make a 2d tensor for the bias term */
/* make a 2d tensor for the bias term */
XTensor
b2D
;
XTensor
b2D
;
InitTensor
(
&
b2D
,
&
s
);
InitTensor
(
&
b2D
,
&
s
);
Unsqueeze
(
&
b
,
&
b2D
,
0
,
batchSize
);
_
Unsqueeze
(
&
b
,
&
b2D
,
0
,
batchSize
);
/* introduce bias term:
/* introduce bias term:
s = s + b
s = s + b
...
@@ -711,7 +711,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -711,7 +711,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* pass the state through the hard tanh function:
/* pass the state through the hard tanh function:
h = tanh(s) */
h = tanh(s) */
HardTanH
(
&
s
,
&
h
);
_
HardTanH
(
&
s
,
&
h
);
}
}
/* generate the output Pr(w_{n-1}|w_0...w_{n-2}):
/* generate the output Pr(w_{n-1}|w_0...w_{n-2}):
...
@@ -729,16 +729,16 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
...
@@ -729,16 +729,16 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
InitModelTensor2D
(
y
,
batchSize
,
model
.
vSize
,
model
);
InitModelTensor2D
(
y
,
batchSize
,
model
.
vSize
,
model
);
/* s = h_last * w */
/* s = h_last * w */
MatrixMul
(
&
h_last
,
X_NOTRANS
,
&
w
,
X_NOTRANS
,
&
s
);
_
MatrixMul
(
&
h_last
,
X_NOTRANS
,
&
w
,
X_NOTRANS
,
&
s
);
XTensor
b2D
;
XTensor
b2D
;
InitTensor
(
&
b2D
,
&
s
);
InitTensor
(
&
b2D
,
&
s
);
Unsqueeze
(
&
b
,
&
b2D
,
0
,
batchSize
);
_
Unsqueeze
(
&
b
,
&
b2D
,
0
,
batchSize
);
_Sum
(
&
s
,
&
b2D
,
&
s
);
_Sum
(
&
s
,
&
b2D
,
&
s
);
/* y = softmax(s) */
/* y = softmax(s) */
LogSoftmax
(
&
s
,
&
y
,
1
);
_
LogSoftmax
(
&
s
,
&
y
,
1
);
}
}
...
@@ -782,18 +782,18 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
...
@@ -782,18 +782,18 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
x is the top most hidden layer)
x is the top most hidden layer)
so we know
so we know
dE/dw = x^T * dE/ds */
dE/dw = x^T * dE/ds */
MatrixMul
(
&
x
,
X_TRANS
,
&
deds
,
X_NOTRANS
,
&
dedw
);
_
MatrixMul
(
&
x
,
X_TRANS
,
&
deds
,
X_NOTRANS
,
&
dedw
);
/* gradient of the bias: dE/db = dE/ds * 1 = dE/ds
/* gradient of the bias: dE/db = dE/ds * 1 = dE/ds
specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */
specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */
ReduceSum
(
&
deds
,
&
dedb
,
0
);
_
ReduceSum
(
&
deds
,
&
dedb
,
0
);
/* then, we compute
/* then, we compute
dE/dx_{j} = \sum_j' (dE/ds_{j'} * ds_{j'}/dx_j)
dE/dx_{j} = \sum_j' (dE/ds_{j'} * ds_{j'}/dx_j)
= \sum_j' (dE/ds_{j'} * w_{j, j'})
= \sum_j' (dE/ds_{j'} * w_{j, j'})
i.e.,
i.e.,
dE/dx = dE/ds * w^T */
dE/dx = dE/ds * w^T */
MatrixMul
(
&
deds
,
X_NOTRANS
,
&
w
,
X_TRANS
,
&
dedx
);
_
MatrixMul
(
&
deds
,
X_NOTRANS
,
&
w
,
X_TRANS
,
&
dedx
);
XTensor
&
gradPassed
=
dedx
;
XTensor
&
gradPassed
=
dedx
;
XTensor
dedsHidden
;
XTensor
dedsHidden
;
...
@@ -821,17 +821,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
...
@@ -821,17 +821,17 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
HardTanHBackward
(
NULL
,
&
h
,
&
s
,
&
dedh
,
&
deds
,
NOLOSS
);
HardTanHBackward
(
NULL
,
&
h
,
&
s
,
&
dedh
,
&
deds
,
NOLOSS
);
/* gradient of the weight: dE/dw = x^T * dE/ds */
/* gradient of the weight: dE/dw = x^T * dE/ds */
MatrixMul
(
&
x
,
X_TRANS
,
&
deds
,
X_NOTRANS
,
&
dedw
);
_
MatrixMul
(
&
x
,
X_TRANS
,
&
deds
,
X_NOTRANS
,
&
dedw
);
/* gradient of the bias: dE/db = dE/ds * 1 = dE/ds
/* gradient of the bias: dE/db = dE/ds * 1 = dE/ds
specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */
specifically dE/db_{j} = \sum_{i} dE/ds_{i,j} */
ReduceSum
(
&
deds
,
&
dedb
,
0
);
_
ReduceSum
(
&
deds
,
&
dedb
,
0
);
/* gradient of the input: dE/dx = dE/ds * w^T */
/* gradient of the input: dE/dx = dE/ds * w^T */
MatrixMul
(
&
deds
,
X_NOTRANS
,
&
w
,
X_TRANS
,
&
dedx
);
_
MatrixMul
(
&
deds
,
X_NOTRANS
,
&
w
,
X_TRANS
,
&
dedx
);
if
(
i
>
0
)
if
(
i
>
0
)
CopyValues
(
&
dedx
,
&
gradPassed
);
_
CopyValues
(
&
dedx
,
&
gradPassed
);
}
}
XList
eList
(
n
-
1
);
XList
eList
(
n
-
1
);
...
@@ -846,7 +846,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
...
@@ -846,7 +846,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
XTensor
&
dedyCat
=
depth
>
0
?
dedxBottom
:
dedx
;
XTensor
&
dedyCat
=
depth
>
0
?
dedxBottom
:
dedx
;
/* split the concatenation of gradients of the embeddings */
/* split the concatenation of gradients of the embeddings */
Split
(
&
dedyCat
,
&
eList
,
1
,
n
-
1
);
_
Split
(
&
dedyCat
,
&
eList
,
1
,
n
-
1
);
/* go over for each word */
/* go over for each word */
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
-
1
;
i
++
)
{
...
@@ -857,7 +857,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
...
@@ -857,7 +857,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* gradient of the embedding weight: dE/dw += x^T * dE/dy
/* gradient of the embedding weight: dE/dw += x^T * dE/dy
NOTE that we accumulate dE/dw here because the matrix w
NOTE that we accumulate dE/dw here because the matrix w
is shared by several layers (or words) */
is shared by several layers (or words) */
MatrixMul
(
&
x
,
X_TRANS
,
dedy
,
X_NOTRANS
,
&
dedw
,
1.0
F
,
1.0
F
);
_
MatrixMul
(
&
x
,
X_TRANS
,
dedy
,
X_NOTRANS
,
&
dedw
,
1.0
F
,
1.0
F
);
delete
dedy
;
delete
dedy
;
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论