Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
杨迪
NiuTrans.Tensor
Commits
baad6629
Commit
baad6629
authored
Sep 18, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve the space management
parent
6ea64b51
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
340 行增加
和
147 行删除
+340
-147
source/network/XBackwardFunc.cpp
+1
-1
source/network/XBackwardFunc.h
+1
-1
source/network/XBackwardMath.cpp
+166
-78
source/network/XBackwardMath.h
+29
-29
source/network/XBackwardShape.cpp
+30
-16
source/network/XBackwardShape.h
+9
-9
source/network/XNet.cpp
+90
-11
source/network/XNet.h
+14
-2
没有找到文件。
source/network/XBackwardFunc.cpp
查看文件 @
baad6629
...
...
@@ -29,7 +29,7 @@
namespace
nts
{
/* compute dE/dx of a node */
void
XFuncGrad
::
MakeGrad
(
XTensor
*
node
)
void
XFuncGrad
::
MakeGrad
(
XTensor
*
node
,
bool
isEfficient
)
{
...
...
source/network/XBackwardFunc.h
查看文件 @
baad6629
...
...
@@ -35,7 +35,7 @@ class XFuncGrad
public
:
/* compute dE/dx of a node */
static
void
MakeGrad
(
XTensor
*
node
);
void
MakeGrad
(
XTensor
*
node
,
bool
isEfficient
);
/* indicates whether the node is for an activation function */
static
...
...
source/network/XBackwardMath.cpp
查看文件 @
baad6629
...
...
@@ -28,69 +28,73 @@
namespace
nts
{
/* compute dE/dx of a node */
void
XMathGrad
::
MakeGrad
(
XTensor
*
node
)
void
XMathGrad
::
MakeGrad
(
XTensor
*
node
,
bool
isEfficient
)
{
CheckNTErrors
(
node
->
grad
!=
NULL
,
"No gradient found!"
);
if
(
!
isEfficient
){
CheckNTErrors
(
node
->
grad
!=
NULL
,
"No gradient found!"
);
}
else
{
CheckNTErrors
(
!
node
->
isGrad
||
node
->
grad
!=
NULL
,
"No gradient found!"
);
}
XLink
&
income
=
node
->
income
;
int
operID
=
income
.
typeID
;
if
(
operID
==
MATH_ABSOLUTE
)
GradAbsolute
(
node
);
GradAbsolute
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_COS
)
GradCos
(
node
);
GradCos
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_EXP
)
GradExp
(
node
);
GradExp
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_LOG
)
GradLog
(
node
);
GradLog
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_ROUND
)
GradRound
(
node
);
GradRound
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SIGN
)
GradSign
(
node
);
GradSign
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SIN
)
GradSin
(
node
);
GradSin
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_TAN
)
GradTan
(
node
);
GradTan
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_CLIP
)
GradClip
(
node
);
GradClip
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_DIV
)
GradDiv
(
node
);
GradDiv
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_DIVDIM
)
GradDivDim
(
node
);
GradDivDim
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_MATRIXMUL
)
GradMatrixMul
(
node
);
GradMatrixMul
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_MATRIXMULBATCHED
)
GradMatrixMulBatched
(
node
);
GradMatrixMulBatched
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_MULTIPLY
)
GradMultiply
(
node
);
GradMultiply
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_MULTIPLYDIM
)
GradMultiplyDim
(
node
);
GradMultiplyDim
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_NEGATE
)
GradNegate
(
node
);
GradNegate
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_NORMALIZE
)
GradNormalize
(
node
);
GradNormalize
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_POWER
)
GradPower
(
node
);
GradPower
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SCALEANDSHIFT
)
GradScaleAndShift
(
node
);
GradScaleAndShift
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SUB
)
GradSub
(
node
);
GradSub
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SUBDIM
)
GradSubDim
(
node
);
GradSubDim
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SUM
)
GradSum
(
node
);
GradSum
(
node
,
isEfficient
);
else
if
(
operID
==
MATH_SUMDIM
)
GradSumDim
(
node
);
GradSumDim
(
node
,
isEfficient
);
else
if
(
operID
==
REDUCE_REDUCEMEAN
)
GradReduceMean
(
node
);
GradReduceMean
(
node
,
isEfficient
);
else
if
(
operID
==
REDUCE_REDUCESUM
)
GradReduceSum
(
node
);
GradReduceSum
(
node
,
isEfficient
);
else
if
(
operID
==
REDUCE_REDUCESUMSQUARED
)
GradReduceSumSquared
(
node
);
GradReduceSumSquared
(
node
,
isEfficient
);
else
if
(
operID
==
REDUCE_REDUCEVARIANCE
)
GradReduceVariance
(
node
);
GradReduceVariance
(
node
,
isEfficient
);
else
{
ShowNTErrors
(
"TODO!"
);
}
...
...
@@ -111,8 +115,10 @@ we have
dE/da = dE/dc a >= 0
-dE/dc a < 0
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradAbsolute
(
XTensor
*
node
)
void
XMathGrad
::
GradAbsolute
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for ABSOLUTE!"
);
...
...
@@ -137,8 +143,10 @@ c = cos(a)
we have
dE/da = dE/dc * -sin(a)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradCos
(
XTensor
*
node
)
void
XMathGrad
::
GradCos
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for COS!"
);
...
...
@@ -164,8 +172,10 @@ c = exp(a)
we have
dE/da = dE/dc * exp(a)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradExp
(
XTensor
*
node
)
void
XMathGrad
::
GradExp
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for EXP!"
);
...
...
@@ -190,8 +200,10 @@ c = log(a)
we have
dE/da = dE/dc * 1/a
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradLog
(
XTensor
*
node
)
void
XMathGrad
::
GradLog
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for LOG!"
);
...
...
@@ -212,8 +224,10 @@ c = round(a)
we have
dE/da = 0
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradRound
(
XTensor
*
node
)
void
XMathGrad
::
GradRound
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for ROUND!"
);
...
...
@@ -231,8 +245,10 @@ c = sign(a)
we have
dE/da = 0
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradSign
(
XTensor
*
node
)
void
XMathGrad
::
GradSign
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SIGN!"
);
...
...
@@ -250,8 +266,10 @@ c = sin(a)
we have
dE/da = dE/dc * cos(a)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradSin
(
XTensor
*
node
)
void
XMathGrad
::
GradSin
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SIN!"
);
...
...
@@ -276,8 +294,10 @@ c = tan(a)
we have
dE/da = dE/dc * 1/(cos(a))^2
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradTan
(
XTensor
*
node
)
void
XMathGrad
::
GradTan
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for TAN!"
);
...
...
@@ -302,8 +322,10 @@ we have
dE/da = 1 lower < a < upper
dE/da = 0 otherwise
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradClip
(
XTensor
*
node
)
void
XMathGrad
::
GradClip
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for CLIP!"
);
...
...
@@ -332,8 +354,10 @@ we have
dE/da = dE/dc / b
dE/db = dE/dc * a / -b^2
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradDiv
(
XTensor
*
node
)
void
XMathGrad
::
GradDiv
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for DIVIDE!"
);
...
...
@@ -365,8 +389,12 @@ c = a / b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * (1/b)
dE/db = (dE/dc * (-a/b^2)).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradDivDim
(
XTensor
*
node
)
void
XMathGrad
::
GradDivDim
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for DIVDIM!"
);
...
...
@@ -466,8 +494,10 @@ we have
dE/da = dE/dc * b^T * \alpha
dE/db = a^T * dE/dc * \alpha
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradMatrixMul
(
XTensor
*
node
)
void
XMathGrad
::
GradMatrixMul
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLY!"
);
...
...
@@ -479,17 +509,19 @@ void XMathGrad::GradMatrixMul(XTensor * node)
MATRIX_TRANS_TYPE
transB
=
income
.
GetParamTrans
(
1
);
DTYPE
alpha
=
income
.
GetParam
(
2
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
if
(
!
isEfficient
||
a
->
isGrad
)
XNoder
::
MakeGrad
(
a
);
if
(
!
isEfficient
||
b
->
isGrad
)
XNoder
::
MakeGrad
(
b
);
XTensor
*
c
=
node
;
XTensor
*
dedc
=
node
->
grad
;
XTensor
*
deda
=
a
->
grad
;
XTensor
*
dedb
=
b
->
grad
;
if
(
deda
->
order
==
2
&&
ded
b
->
order
==
2
)
GradMatrixMul
(
a
,
deda
,
transA
,
b
,
dedb
,
transB
,
dedc
,
alpha
);
else
if
(
transA
==
X_NOTRANS
&&
deda
->
order
>
2
&&
ded
b
->
order
==
2
){
if
(
a
->
order
==
2
&&
b
->
order
==
2
)
GradMatrixMul
(
a
,
deda
,
transA
,
b
,
dedb
,
transB
,
dedc
,
alpha
,
isEfficient
);
else
if
(
transA
==
X_NOTRANS
&&
a
->
order
>
2
&&
b
->
order
==
2
){
int
orderBackupA
=
a
->
order
;
int
orderBackupC
=
c
->
order
;
int
dimsBackupA
[
MAX_TENSOR_DIM_NUM
];
...
...
@@ -499,14 +531,16 @@ void XMathGrad::GradMatrixMul(XTensor * node)
a
->
Reshape
(
a
->
unitNum
/
a
->
GetDim
(
-
1
),
a
->
GetDim
(
-
1
));
c
->
Reshape
(
c
->
unitNum
/
c
->
GetDim
(
-
1
),
c
->
GetDim
(
-
1
));
deda
->
Reshape
(
deda
->
unitNum
/
deda
->
GetDim
(
-
1
),
deda
->
GetDim
(
-
1
));
if
(
!
isEfficient
||
a
->
isGrad
)
deda
->
Reshape
(
deda
->
unitNum
/
deda
->
GetDim
(
-
1
),
deda
->
GetDim
(
-
1
));
dedc
->
Reshape
(
dedc
->
unitNum
/
dedc
->
GetDim
(
-
1
),
dedc
->
GetDim
(
-
1
));
GradMatrixMul
(
a
,
deda
,
transA
,
b
,
dedb
,
transB
,
dedc
,
alpha
);
GradMatrixMul
(
a
,
deda
,
transA
,
b
,
dedb
,
transB
,
dedc
,
alpha
,
isEfficient
);
a
->
Reshape
(
orderBackupA
,
dimsBackupA
);
c
->
Reshape
(
orderBackupC
,
dimsBackupC
);
deda
->
Reshape
(
orderBackupA
,
dimsBackupA
);
if
(
!
isEfficient
||
a
->
isGrad
)
deda
->
Reshape
(
orderBackupA
,
dimsBackupA
);
dedc
->
Reshape
(
orderBackupC
,
dimsBackupC
);
}
else
{
...
...
@@ -524,19 +558,23 @@ gradient for matrix multiply: c = matmul(a, b) * \alpha
>> dedb - dE/db
>> dedc - dE/dc
>> alpha - the scalar
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradMatrixMul
(
XTensor
*
a
,
XTensor
*
deda
,
MATRIX_TRANS_TYPE
transA
,
XTensor
*
b
,
XTensor
*
dedb
,
MATRIX_TRANS_TYPE
transB
,
XTensor
*
dedc
,
DTYPE
alpha
)
XTensor
*
dedc
,
DTYPE
alpha
,
bool
isEfficient
)
{
/* c = a * b * \alpha */
if
(
transA
==
X_NOTRANS
&&
transB
==
X_NOTRANS
){
/* dE/da = dE/dc * b^T * \alpha */
_MatrixMul
(
dedc
,
X_NOTRANS
,
b
,
X_TRANS
,
deda
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
a
->
isGrad
)
_MatrixMul
(
dedc
,
X_NOTRANS
,
b
,
X_TRANS
,
deda
,
alpha
,
1.0
F
);
/* dE/db = a^T * dE/dc * \alpha */
_MatrixMul
(
a
,
X_TRANS
,
dedc
,
X_NOTRANS
,
dedb
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
b
->
isGrad
)
_MatrixMul
(
a
,
X_TRANS
,
dedc
,
X_NOTRANS
,
dedb
,
alpha
,
1.0
F
);
}
/* c = a^T * b * \alpha */
...
...
@@ -544,21 +582,25 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
/* dE/da = (dE/dc * b^T)^T * \alpha
= b * dE/dc^T * \alpha */
_MatrixMul
(
b
,
X_NOTRANS
,
dedc
,
X_TRANS
,
deda
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
a
->
isGrad
)
_MatrixMul
(
b
,
X_NOTRANS
,
dedc
,
X_TRANS
,
deda
,
alpha
,
1.0
F
);
/* dE/db = a * dE/dc * \alpha */
_MatrixMul
(
a
,
X_NOTRANS
,
dedc
,
X_NOTRANS
,
dedb
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
b
->
isGrad
)
_MatrixMul
(
a
,
X_NOTRANS
,
dedc
,
X_NOTRANS
,
dedb
,
alpha
,
1.0
F
);
}
/* c = a * b^T * \alpha */
else
if
(
transA
==
X_NOTRANS
&&
transB
==
X_TRANS
){
/* dE/da = dE/dc * b * \alpha */
_MatrixMul
(
dedc
,
X_NOTRANS
,
b
,
X_NOTRANS
,
deda
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
a
->
isGrad
)
_MatrixMul
(
dedc
,
X_NOTRANS
,
b
,
X_NOTRANS
,
deda
,
alpha
,
1.0
F
);
/* dE/db = (a^T * dE/dc)^T * \alpha
= dE/dc^T * a * \alpha */
_MatrixMul
(
dedc
,
X_TRANS
,
a
,
X_NOTRANS
,
dedb
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
b
->
isGrad
)
_MatrixMul
(
dedc
,
X_TRANS
,
a
,
X_NOTRANS
,
dedb
,
alpha
,
1.0
F
);
}
/* c = a^T * b^T * \alpha */
...
...
@@ -566,11 +608,13 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
/* dE/da = (dE/dc * b)^T * \alpha
= b^T * dE/dc^T * \alpha */
_MatrixMul
(
b
,
X_TRANS
,
dedc
,
X_TRANS
,
deda
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
a
->
isGrad
)
_MatrixMul
(
b
,
X_TRANS
,
dedc
,
X_TRANS
,
deda
,
alpha
,
1.0
F
);
/* dE/db = (a * dE/dc)^T * \alpha
= dE/dc^T * a^T * \alpha */
_MatrixMul
(
dedc
,
X_TRANS
,
a
,
X_TRANS
,
dedb
,
alpha
,
1.0
F
);
if
(
!
isEfficient
||
b
->
isGrad
)
_MatrixMul
(
dedc
,
X_TRANS
,
a
,
X_TRANS
,
dedb
,
alpha
,
1.0
F
);
}
}
...
...
@@ -582,8 +626,10 @@ we have
dE/da_i = dE/dc_i * b_i^T * \alpha
dE/db_i = a_i^T * dE/dc_i * \alpha
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradMatrixMulBatched
(
XTensor
*
node
)
void
XMathGrad
::
GradMatrixMulBatched
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLY!"
);
...
...
@@ -657,8 +703,10 @@ we have
dE/da = dE/dc * b
dE/db = dE/dc * a
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradMultiply
(
XTensor
*
node
)
void
XMathGrad
::
GradMultiply
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLY!"
);
...
...
@@ -681,8 +729,12 @@ c = a * b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * b
dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradMultiplyDim
(
XTensor
*
node
)
void
XMathGrad
::
GradMultiplyDim
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLYDIM!"
);
...
...
@@ -771,8 +823,10 @@ c = -a
we have
dE/da = dE/dc * (-1)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradNegate
(
XTensor
*
node
)
void
XMathGrad
::
GradNegate
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for NEGATE!"
);
...
...
@@ -793,8 +847,10 @@ void XMathGrad::GradNegate(XTensor * node)
/*
gradient for normalize
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradNormalize
(
XTensor
*
node
)
void
XMathGrad
::
GradNormalize
(
XTensor
*
node
,
bool
isEfficient
)
{
ShowNTErrors
(
"This is really a bad piece of code!!!"
);
...
...
@@ -887,8 +943,10 @@ c = pow(a,p)
we have
dE/da = (dE/dc) * p * a^(p-1)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradPower
(
XTensor
*
node
)
void
XMathGrad
::
GradPower
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for POWER!"
);
...
...
@@ -916,8 +974,10 @@ c = a * scale + shift
we have
dE/da = dE/dc * scale
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradScaleAndShift
(
XTensor
*
node
)
void
XMathGrad
::
GradScaleAndShift
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SCALEANDSHIFT!"
);
...
...
@@ -941,8 +1001,10 @@ we have
dE/da = dE/dc
dE/db = -dE/dc * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradSub
(
XTensor
*
node
)
void
XMathGrad
::
GradSub
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUBSTRACT!"
);
...
...
@@ -966,8 +1028,11 @@ c = a - b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc
dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradSubDim
(
XTensor
*
node
)
void
XMathGrad
::
GradSubDim
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUBDIM!"
);
...
...
@@ -1063,9 +1128,12 @@ c = a + b * \beta
we have
dE/da = dE/dc
dE/db = dE/dc * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradSum
(
XTensor
*
node
)
void
XMathGrad
::
GradSum
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUM!"
);
...
...
@@ -1074,11 +1142,15 @@ void XMathGrad::GradSum(XTensor * node)
XTensor
*
b
=
income
.
tails
[
1
];
DTYPE
beta
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
if
(
!
isEfficient
||
a
->
isGrad
){
XNoder
::
MakeGrad
(
a
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
}
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
,
beta
);
if
(
!
isEfficient
||
b
->
isGrad
){
XNoder
::
MakeGrad
(
b
);
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
,
beta
);
}
node
->
visitMark
=
NODE_FINISHED
;
}
...
...
@@ -1089,8 +1161,12 @@ c = a + b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc
dE/db = dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradSumDim
(
XTensor
*
node
)
void
XMathGrad
::
GradSumDim
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUMDIM!"
);
...
...
@@ -1183,9 +1259,12 @@ for
c = reduceMean(a, dim)
we have
dE/da = Unsqueeze(dE/dc) * 1/dimSizeA[dim]
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradReduceMean
(
XTensor
*
node
)
void
XMathGrad
::
GradReduceMean
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for Reduce!"
);
...
...
@@ -1213,9 +1292,12 @@ for
c = reduceSum(a, dim)
we have
dE/da = Unsqueeze(dE/dc) * 1
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradReduceSum
(
XTensor
*
node
)
void
XMathGrad
::
GradReduceSum
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for Reduce!"
);
...
...
@@ -1243,9 +1325,12 @@ c = \sum_i (a_i - b)^2
we have
dE/da = Unsqueeze(dE/dc) * 2a
dE/db = dE/dc * -2 * n * b
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradReduceSumSquared
(
XTensor
*
node
)
void
XMathGrad
::
GradReduceSumSquared
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for Reduce!"
);
...
...
@@ -1292,9 +1377,12 @@ where b is the mean, and n is the size of a
we have
dE/da = Unsqueeze(dE/dc) * 2a/n
dE/db = dE/dc * -2 * b
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XMathGrad
::
GradReduceVariance
(
XTensor
*
node
)
void
XMathGrad
::
GradReduceVariance
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for Reduce!"
);
...
...
source/network/XBackwardMath.h
查看文件 @
baad6629
...
...
@@ -33,7 +33,7 @@ class XMathGrad
public
:
/* compute dE/dx of a node */
static
void
MakeGrad
(
XTensor
*
node
);
void
MakeGrad
(
XTensor
*
node
,
bool
isEfficient
);
/* indicates whether the node is for a math operation */
static
...
...
@@ -43,121 +43,121 @@ private:
/* gradient for absolute */
static
void
GradAbsolute
(
XTensor
*
node
);
void
GradAbsolute
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for cos */
static
void
GradCos
(
XTensor
*
node
);
void
GradCos
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for exp */
static
void
GradExp
(
XTensor
*
node
);
void
GradExp
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for log: c = log(a) */
static
void
GradLog
(
XTensor
*
node
);
void
GradLog
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for round */
static
void
GradRound
(
XTensor
*
node
);
void
GradRound
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sign */
static
void
GradSign
(
XTensor
*
node
);
void
GradSign
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sin */
static
void
GradSin
(
XTensor
*
node
);
void
GradSin
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for tan */
static
void
GradTan
(
XTensor
*
node
);
void
GradTan
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for clip */
static
void
GradClip
(
XTensor
*
node
);
void
GradClip
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for Divide */
static
void
GradDiv
(
XTensor
*
node
);
void
GradDiv
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for DivideDim */
static
void
GradDivDim
(
XTensor
*
node
);
void
GradDivDim
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
void
GradMatrixMul
(
XTensor
*
node
);
void
GradMatrixMul
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
void
GradMatrixMul
(
XTensor
*
a
,
XTensor
*
deda
,
MATRIX_TRANS_TYPE
transA
,
XTensor
*
b
,
XTensor
*
dedb
,
MATRIX_TRANS_TYPE
transB
,
XTensor
*
dedc
,
DTYPE
alpha
);
XTensor
*
dedc
,
DTYPE
alpha
,
bool
isEfficient
);
/* gradient for matrix multiply in batch mode.
for each batch: c_i = matmul(a_i, b_i) * \alpha */
static
void
GradMatrixMulBatched
(
XTensor
*
node
);
void
GradMatrixMulBatched
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for multiply (dot production): c = a * b * \alpha */
static
void
GradMultiply
(
XTensor
*
node
);
void
GradMultiply
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for multiply one dimension: c = a * b * \alpha
where the size of b is equal to that of one dimension of a */
static
void
GradMultiplyDim
(
XTensor
*
node
);
void
GradMultiplyDim
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for negate */
static
void
GradNegate
(
XTensor
*
node
);
void
GradNegate
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for normalize */
static
void
GradNormalize
(
XTensor
*
node
);
void
GradNormalize
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for power */
static
void
GradPower
(
XTensor
*
node
);
void
GradPower
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for ScaleAndShift */
static
void
GradScaleAndShift
(
XTensor
*
node
);
void
GradScaleAndShift
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for Minus */
static
void
GradSub
(
XTensor
*
node
);
void
GradSub
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void
GradSubDim
(
XTensor
*
node
);
void
GradSubDim
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sum: c = a + b * \beta */
static
void
GradSum
(
XTensor
*
node
);
void
GradSum
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for sum with one dimension: c = a + b * \beta
where the size of b is equal to that of one dimension of a */
static
void
GradSumDim
(
XTensor
*
node
);
void
GradSumDim
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for reduceMean */
static
void
GradReduceMean
(
XTensor
*
node
);
void
GradReduceMean
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for reduceSum */
static
void
GradReduceSum
(
XTensor
*
node
);
void
GradReduceSum
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for reduceSumSquared */
static
void
GradReduceSumSquared
(
XTensor
*
node
);
void
GradReduceSumSquared
(
XTensor
*
node
,
bool
isEfficient
);
/* gradient for reduceVariance */
static
void
GradReduceVariance
(
XTensor
*
node
);
void
GradReduceVariance
(
XTensor
*
node
,
bool
isEfficient
);
};
}
...
...
source/network/XBackwardShape.cpp
查看文件 @
baad6629
...
...
@@ -30,7 +30,7 @@
namespace
nts
{
/* compute dE/dx of a node */
void
XShapeGrad
::
MakeGrad
(
XTensor
*
node
)
void
XShapeGrad
::
MakeGrad
(
XTensor
*
node
,
bool
isEfficent
)
{
CheckNTErrors
(
node
->
grad
!=
NULL
,
"No gradient found!"
);
...
...
@@ -38,17 +38,17 @@ void XShapeGrad::MakeGrad(XTensor * node)
int
operID
=
income
.
typeID
;
if
(
operID
==
SHAPE_MERGE
)
GradMerge
(
node
);
GradMerge
(
node
,
isEfficent
);
else
if
(
operID
==
SHAPE_MERGE_LIST
)
GradMergeList
(
node
);
GradMergeList
(
node
,
isEfficent
);
else
if
(
operID
==
SHAPE_UNSQUEEZE
)
GradUnsqueeze
(
node
);
GradUnsqueeze
(
node
,
isEfficent
);
else
if
(
operID
==
SHAPE_SPLIT
)
GradSplit
(
node
);
GradSplit
(
node
,
isEfficent
);
else
if
(
operID
==
SHAPE_SPLIT_LIST
)
GradSplitList
(
node
);
GradSplitList
(
node
,
isEfficent
);
else
if
(
operID
==
SHAPE_TRANSPOSE
)
GradTranspose
(
node
);
GradTranspose
(
node
,
isEfficent
);
else
{
ShowNTErrors
(
"TODO!"
);
}
...
...
@@ -62,10 +62,10 @@ bool XShapeGrad::IsShapeOP(XTensor * node)
}
/* post processing of a node */
void
XShapeGrad
::
PostProcessing
(
XTensor
*
node
,
int
typeID
)
void
XShapeGrad
::
PostProcessing
(
XTensor
*
node
,
int
typeID
,
bool
isEfficent
)
{
if
(
typeID
==
SHAPE_SPLIT_LIST
)
GradSplitListPost
(
node
);
GradSplitListPost
(
node
,
isEfficent
);
}
/*
...
...
@@ -80,8 +80,10 @@ dE/db_1 = dE/dc_{split_1}
i.e.,
dE/da = split(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradMerge
(
XTensor
*
node
)
void
XShapeGrad
::
GradMerge
(
XTensor
*
node
,
bool
isEfficent
)
{
XLink
&
income
=
node
->
income
;
XTensor
*
input
=
income
.
tails
[
0
];
...
...
@@ -162,8 +164,10 @@ dE/db = dE/dc_{split_1}
i.e.,
list(dE/da, dE/db, ...) = split(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradMergeList
(
XTensor
*
node
)
void
XShapeGrad
::
GradMergeList
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
>
0
,
"Wrong input tensor number for MERGE!"
);
...
...
@@ -239,8 +243,10 @@ c = split(a)
we have
dE/da = merge(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradSplit
(
XTensor
*
node
)
void
XShapeGrad
::
GradSplit
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
XTensor
*
input
=
income
.
tails
[
0
];
...
...
@@ -279,8 +285,10 @@ list(c_1, ...) = split(a)
we have
dE/da = merge(dE/c_1, ...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradSplitList
(
XTensor
*
node
)
void
XShapeGrad
::
GradSplitList
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
XTensor
*
input
=
income
.
tails
[
0
];
...
...
@@ -299,8 +307,10 @@ have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs
one time. This is good for system speed up.
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradSplitListPost
(
XTensor
*
node
)
void
XShapeGrad
::
GradSplitListPost
(
XTensor
*
node
,
bool
isEfficient
)
{
/* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */
...
...
@@ -351,8 +361,10 @@ c = unsqueeze(a)
we have
dE/da = reduecesum(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradUnsqueeze
(
XTensor
*
node
)
void
XShapeGrad
::
GradUnsqueeze
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for UNSQUEEZE!"
);
...
...
@@ -379,8 +391,10 @@ c = Transpose(a)
we have
dE/da = Transpose(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void
XShapeGrad
::
GradTranspose
(
XTensor
*
node
)
void
XShapeGrad
::
GradTranspose
(
XTensor
*
node
,
bool
isEfficient
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for TRANSPOSE!"
);
...
...
source/network/XBackwardShape.h
查看文件 @
baad6629
...
...
@@ -34,7 +34,7 @@ class XShapeGrad
public
:
/* compute dE/dx of a node */
static
void
MakeGrad
(
XTensor
*
node
);
void
MakeGrad
(
XTensor
*
node
,
bool
isEfficent
);
/* indicates whether the node is for a shaping operation */
static
...
...
@@ -42,38 +42,38 @@ public:
/* post processing of a node */
static
void
PostProcessing
(
XTensor
*
node
,
int
typeId
);
void
PostProcessing
(
XTensor
*
node
,
int
typeId
,
bool
isEfficent
);
private
:
/* gradient computation for merge: c = merge(a, b, ...) */
static
void
GradMerge
(
XTensor
*
node
);
void
GradMerge
(
XTensor
*
node
,
bool
isEfficent
);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static
void
GradMergeList
(
XTensor
*
node
);
void
GradMergeList
(
XTensor
*
node
,
bool
isEfficent
);
/* gradient computation for split: c = split(a) */
static
void
GradSplit
(
XTensor
*
node
);
void
GradSplit
(
XTensor
*
node
,
bool
isEfficent
);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static
void
GradSplitList
(
XTensor
*
node
);
void
GradSplitList
(
XTensor
*
node
,
bool
isEfficent
);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static
void
GradSplitListPost
(
XTensor
*
node
);
void
GradSplitListPost
(
XTensor
*
node
,
bool
isEfficent
);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void
GradUnsqueeze
(
XTensor
*
node
);
void
GradUnsqueeze
(
XTensor
*
node
,
bool
isEfficent
);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void
GradTranspose
(
XTensor
*
node
);
void
GradTranspose
(
XTensor
*
node
,
bool
isEfficent
);
};
...
...
source/network/XNet.cpp
查看文件 @
baad6629
...
...
@@ -55,6 +55,7 @@ void XNetClearAll()
XNet
::
XNet
()
{
nodes
.
Clear
();
isGradEfficient
=
false
;
}
/* de-constructor */
...
...
@@ -115,6 +116,10 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
Traverse
(
roots
);
/* label tensors where the backward computation is neccessary */
if
(
isGradEfficient
)
MakeEfficientNet
();
for
(
int
i
=
0
;
i
<
nodes
.
count
;
i
++
){
XTensor
*
node
=
(
XTensor
*
)
nodes
.
Get
(
i
);
node
->
visitMark
=
NODE_UNFINISHED
;
...
...
@@ -154,10 +159,20 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
CheckNTErrors
(
node
->
mem
->
bufUsed
<
BUF_PITCH
,
"Illegal access of buffer!"
);
}
if
(
node
->
visitMark
==
NODE_FINISHED
)
continue
;
BackwardNode
(
node
);
if
(
node
->
visitMark
!=
NODE_FINISHED
)
BackwardNode
(
node
,
isGradEfficient
);
if
(
isGradEfficient
){
if
(
!
XNoder
::
IsLeaf
(
node
)){
XLink
&
outgo
=
node
->
outgo
;
for
(
int
i
=
0
;
i
<
outgo
.
tailNum
;
i
++
){
XTensor
*
parent
=
outgo
.
tails
[
i
];
ClearGrad
(
parent
);
}
}
else
ClearGrad
(
node
);
}
}
}
...
...
@@ -179,27 +194,32 @@ void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
/*
backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function)
>> isEfficient - indicates whether the back-propagation is compuated in an
efficient manner
*/
void
XNet
::
BackwardNode
(
XTensor
*
node
)
void
XNet
::
BackwardNode
(
XTensor
*
node
,
bool
isEfficent
)
{
if
(
node
==
NULL
||
node
->
visitMark
==
NODE_FINISHED
)
return
;
if
(
!
XNoder
::
IsLeaf
(
node
)){
/* post processing for parent nodes */
BackwardNodePost
(
node
);
BackwardNodePost
(
node
,
isEfficent
);
/* process the current node */
if
(
XMathGrad
::
IsMathOP
(
node
))
XMathGrad
::
MakeGrad
(
node
);
XMathGrad
::
MakeGrad
(
node
,
isEfficent
);
else
if
(
XFuncGrad
::
IsFunc
(
node
))
XFuncGrad
::
MakeGrad
(
node
);
XFuncGrad
::
MakeGrad
(
node
,
isEfficent
);
else
if
(
XShapeGrad
::
IsShapeOP
(
node
))
XShapeGrad
::
MakeGrad
(
node
);
XShapeGrad
::
MakeGrad
(
node
,
isEfficent
);
else
{
ShowNTErrors
(
"Wrong node type!"
);
}
}
else
{
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
...
...
@@ -207,7 +227,7 @@ backward computation (in post processing) for a given node
>> node - the node whose parent nodes are not processed yet. So
we do the job at the child node.
*/
void
XNet
::
BackwardNodePost
(
XTensor
*
node
)
void
XNet
::
BackwardNodePost
(
XTensor
*
node
,
bool
isEfficent
)
{
bool
isSplitList
=
false
;
XLink
&
outgo
=
node
->
outgo
;
...
...
@@ -217,7 +237,7 @@ void XNet::BackwardNodePost(XTensor * node)
}
if
(
isSplitList
)
XShapeGrad
::
PostProcessing
(
node
,
SHAPE_SPLIT_LIST
);
XShapeGrad
::
PostProcessing
(
node
,
SHAPE_SPLIT_LIST
,
isEfficent
);
}
/*
...
...
@@ -304,4 +324,62 @@ void XNet::Dump(FILE * file)
}
}
/*
set the flag of gradient-efficient
>> flag - the flag
*/
void
XNet
::
SetGradEfficientFlag
(
bool
flag
)
{
isGradEfficient
=
flag
;
}
/* generate the gradient-efficient flag for every node */
void
XNet
::
MakeEfficientNet
()
{
/* back-propagation from output to input */
for
(
int
i
=
0
;
i
<
nodes
.
count
;
i
++
){
XTensor
*
node
=
(
XTensor
*
)
nodes
.
Get
(
i
);
XLink
&
income
=
node
->
income
;
for
(
int
j
=
0
;
j
<
income
.
tailNum
;
j
++
){
XTensor
*
child
=
income
.
tails
[
j
];
if
(
child
->
isGrad
||
child
->
isVar
){
node
->
SetGradFlag
(
true
);
break
;
}
}
}
}
/*
clear the graident information if the node is no use
>> node - the node that we want to clear
*/
void
XNet
::
ClearGrad
(
XTensor
*
node
)
{
if
(
node
->
isVar
)
return
;
if
(
node
->
grad
==
NULL
)
return
;
if
(
node
->
visitMark
!=
NODE_FINISHED
)
return
;
XLink
&
income
=
node
->
income
;
bool
finished
=
true
;
for
(
int
i
=
0
;
i
<
income
.
tailNum
;
i
++
){
XTensor
*
child
=
income
.
tails
[
i
];
if
(
child
->
visitMark
!=
NODE_FINISHED
){
finished
=
false
;
break
;
}
}
if
(
finished
){
//fprintf(stderr, "del %d %ld\n", node->id, node->grad->unitNum);
delete
node
->
grad
;
node
->
grad
=
NULL
;
}
}
}
\ No newline at end of file
source/network/XNet.h
查看文件 @
baad6629
...
...
@@ -47,6 +47,9 @@ struct XNet
/* input nodes of the network */
XList
inputs
;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool
isGradEfficient
;
/* constructor */
XNet
();
...
...
@@ -71,10 +74,10 @@ struct XNet
void
Backward
(
XList
&
roots
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward computation for a given node */
void
BackwardNode
(
XTensor
*
node
);
void
BackwardNode
(
XTensor
*
node
,
bool
isEfficent
=
false
);
/* backward computation (in post processing) for a given node */
void
BackwardNodePost
(
XTensor
*
node
);
void
BackwardNodePost
(
XTensor
*
node
,
bool
isEfficent
=
false
);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
...
...
@@ -89,6 +92,15 @@ struct XNet
/* dump network information */
void
Dump
(
FILE
*
file
);
/* set the flag of gradient-efficient */
void
SetGradEfficientFlag
(
bool
flag
=
true
);
/* generate the gradient-efficient flag for every node */
void
MakeEfficientNet
();
/* clear the graident information if the node is no use */
void
ClearGrad
(
XTensor
*
node
);
};
/* we make a unique id for every tensor */
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论