Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
8
Issues
8
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
NiuTrans
NiuTrans.Tensor
Commits
f7f33b29
Commit
f7f33b29
authored
Aug 19, 2018
by
xuchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
code optimization
parent
e223c59c
显示空白字符变更
内嵌
并排
正在显示
26 个修改的文件
包含
1506 行增加
和
652 行删除
+1506
-652
source/network/XBackwardMath.cpp
+632
-531
source/network/XBackwardMath.h
+60
-54
source/network/XNet.cpp
+0
-2
source/sample/transformer/T2TAttention.cpp
+20
-3
source/sample/transformer/T2TAttention.h
+11
-2
source/sample/transformer/T2TEmbedding.cpp
+1
-1
source/sample/transformer/T2TEncoder.cpp
+22
-4
source/sample/transformer/T2TEncoder.h
+10
-4
source/sample/transformer/T2TLayerNormal.cpp
+2
-2
source/sample/transformer/T2TModel.cpp
+23
-4
source/sample/transformer/T2TModel.h
+4
-1
source/sample/transformer/T2TUtility.cpp
+3
-1
source/tensor/core/arithmetic/Div.cpp
+51
-5
source/tensor/core/arithmetic/Div.h
+1
-1
source/tensor/core/arithmetic/Multiply.cpp
+48
-2
source/tensor/core/arithmetic/Multiply.h
+1
-1
source/tensor/core/arithmetic/Sub.cpp
+5
-7
source/tensor/core/arithmetic/SubDim.cu
+0
-0
source/tensor/core/arithmetic/Sum.cpp
+5
-25
source/tensor/core/getandset/Select.cpp
+2
-2
source/tensor/core/getandset/SetData.cpp
+100
-0
source/tensor/core/getandset/SetData.cu
+163
-0
source/tensor/core/getandset/SetData.cuh
+6
-0
source/tensor/core/getandset/SetData.h
+6
-0
source/tensor/test/TSubDim.cpp
+297
-0
source/tensor/test/TSubDim.h
+33
-0
没有找到文件。
source/network/XBackwardMath.cpp
查看文件 @
f7f33b29
...
@@ -35,57 +35,61 @@ void XMathGrad::MakeGrad(XTensor * node)
...
@@ -35,57 +35,61 @@ void XMathGrad::MakeGrad(XTensor * node)
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
int
operID
=
income
.
typeID
;
int
operID
=
income
.
typeID
;
if
(
operID
==
MATH_SUM
)
GradSum
(
node
);
if
(
operID
==
MATH_ABSOLUTE
)
else
if
(
operID
==
MATH_SUMDIM
)
GradAbsolute
(
node
);
GradSumDim
(
node
);
else
if
(
operID
==
MATH_COS
)
else
if
(
operID
==
MATH_MULTIPLY
)
GradCos
(
node
);
GradMultiply
(
node
);
else
if
(
operID
==
MATH_EXP
)
else
if
(
operID
==
MATH_MULTIPLYDIM
)
GradExp
(
node
);
GradMultiplyDim
(
node
);
else
if
(
operID
==
MATH_LOG
)
GradLog
(
node
);
else
if
(
operID
==
MATH_ROUND
)
GradRound
(
node
);
else
if
(
operID
==
MATH_SIGN
)
GradSign
(
node
);
else
if
(
operID
==
MATH_SIN
)
GradSin
(
node
);
else
if
(
operID
==
MATH_TAN
)
GradTan
(
node
);
else
if
(
operID
==
MATH_CLIP
)
GradClip
(
node
);
else
if
(
operID
==
MATH_DIV
)
GradDiv
(
node
);
else
if
(
operID
==
MATH_DIVDIM
)
GradDivDim
(
node
);
else
if
(
operID
==
MATH_MATRIXMUL
)
else
if
(
operID
==
MATH_MATRIXMUL
)
GradMatrixMul
(
node
);
GradMatrixMul
(
node
);
else
if
(
operID
==
MATH_MATRIXMULBATCHED
)
else
if
(
operID
==
MATH_MATRIXMULBATCHED
)
GradMatrixMulBatched
(
node
);
GradMatrixMulBatched
(
node
);
else
if
(
operID
==
MATH_LOG
)
else
if
(
operID
==
MATH_MULTIPLY
)
Grad
Log
(
node
);
Grad
Multiply
(
node
);
else
if
(
operID
==
MATH_POWER
)
else
if
(
operID
==
MATH_MULTIPLYDIM
)
Grad
Power
(
node
);
Grad
MultiplyDim
(
node
);
else
if
(
operID
==
MATH_NEGATE
)
else
if
(
operID
==
MATH_NEGATE
)
GradNegate
(
node
);
GradNegate
(
node
);
else
if
(
operID
==
MATH_SCALEANDSHIFT
)
else
if
(
operID
==
MATH_NORMALIZE
)
GradNormalize
(
node
);
else
if
(
operID
==
MATH_POWER
)
GradPower
(
node
);
else
if
(
operID
==
MATH_SCALEANDSHIFT
)
GradScaleAndShift
(
node
);
GradScaleAndShift
(
node
);
else
if
(
operID
==
MATH_DIV
)
else
if
(
operID
==
MATH_SUB
)
GradDiv
(
node
);
else
if
(
operID
==
MATH_DIVDIM
)
GradDivDim
(
node
);
else
if
(
operID
==
MATH_SUB
)
GradSub
(
node
);
GradSub
(
node
);
else
if
(
operID
==
MATH_SIN
)
else
if
(
operID
==
MATH_SUBDIM
)
GradSin
(
node
);
GradSubDim
(
node
);
else
if
(
operID
==
MATH_COS
)
else
if
(
operID
==
MATH_SUM
)
GradCos
(
node
);
GradSum
(
node
);
else
if
(
operID
==
MATH_TAN
)
else
if
(
operID
==
MATH_SUMDIM
)
GradTan
(
node
);
GradSumDim
(
node
);
else
if
(
operID
==
MATH_EXP
)
else
if
(
operID
==
REDUCE_REDUCEMEAN
)
GradExp
(
node
);
else
if
(
operID
==
MATH_NORMALIZE
)
GradNormalize
(
node
);
else
if
(
operID
==
MATH_ABSOLUTE
)
GradAbsolute
(
node
);
else
if
(
operID
==
MATH_SIGN
)
GradSign
(
node
);
else
if
(
operID
==
MATH_ROUND
)
GradRound
(
node
);
else
if
(
operID
==
MATH_CLIP
)
GradClip
(
node
);
else
if
(
operID
==
REDUCE_REDUCEMEAN
)
GradReduceMean
(
node
);
GradReduceMean
(
node
);
else
if
(
operID
==
REDUCE_REDUCESUM
)
else
if
(
operID
==
REDUCE_REDUCESUM
)
GradReduceSum
(
node
);
GradReduceSum
(
node
);
else
if
(
operID
==
REDUCE_REDUCESUMSQUARED
)
else
if
(
operID
==
REDUCE_REDUCESUMSQUARED
)
GradReduceSumSquared
(
node
);
GradReduceSumSquared
(
node
);
else
if
(
operID
==
REDUCE_REDUCEVARIANCE
)
else
if
(
operID
==
REDUCE_REDUCEVARIANCE
)
GradReduceVariance
(
node
);
GradReduceVariance
(
node
);
else
{
else
{
ShowNTErrors
(
"TODO!"
);
ShowNTErrors
(
"TODO!"
);
...
@@ -100,200 +104,317 @@ bool XMathGrad::IsMathOP(XTensor * node)
...
@@ -100,200 +104,317 @@ bool XMathGrad::IsMathOP(XTensor * node)
}
}
/*
/*
gradient for
sum
gradient for
absolute
for
for
c =
a + b * \beta
c =
|a|
we have
we have
dE/da = dE/dc
dE/da = dE/dc
a >= 0
dE/db = dE/dc * \beta
-dE/dc a < 0
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Sum
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Absolute
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUM
!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for ABSOLUTE
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
DTYPE
beta
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Sign
(
a
,
b
);
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
,
beta
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for sum with one dimension
gradient for cos
c = a + b * \beta
for
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
c = cos(a)
dE/da = dE/dc
we have
dE/db = dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
dE/da = dE/dc * -sin(a)
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
SumDim
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Cos
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUMDIM
!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for COS
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
int
n
=
income
.
GetParamInt
(
0
);
DTYPE
beta
=
income
.
GetParam
(
1
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Sin
(
a
,
b
);
_ScaleAndShiftMe
(
b
,
-
1.0
F
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
int
order
=
a
->
order
;
DelTensorBuf
(
b
);
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
if
(
n
==
order
-
1
){
node
->
visitMark
=
NODE_FINISHED
;
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
}
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
/* we reshape dE/dc to a matrix whose column number is equal to the
/*
size of b. Then we can reduce the matrix into a row vector. */
gradient for exp
node
->
grad
->
Reshape
(
2
,
reshapedSize
);
for
c = exp(a)
we have
dE/da = dE/dc * exp(a)
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradExp
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for EXP!"
);
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
_ReduceSum
(
node
->
grad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sum
(
bGradTMP
,
b
->
grad
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
}
else
{
_ReduceSum
(
node
->
grad
,
b
->
grad
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
b
->
grad
,
beta
);
}
node
->
grad
->
Reshape
(
order
,
dimSize
);
XNoder
::
MakeGrad
(
a
);
}
else
{
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
1
;
reshapedSize
[
1
]
=
dimSize
[
n
];
reshapedSize
[
2
]
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
){
_Exp
(
a
,
b
);
if
(
i
<
n
)
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
reshapedSize
[
0
]
*=
dimSize
[
i
];
}
reshapedSize
[
2
]
=
a
->
unitNum
/
(
reshapedSize
[
0
]
*
reshapedSize
[
1
]
);
DelTensorBuf
(
b
);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
node
->
visitMark
=
NODE_FINISHED
;
Then reduce along with z and x to obtain dE/db. */
}
node
->
grad
->
Reshape
(
3
,
reshapedSize
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
/*
gradient for log
for
c = log(a)
we have
dE/da = dE/dc * 1/a
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradLog
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for LOG!"
);
_ReduceSum
(
node
->
grad
,
interGrad
,
2
)
;
XTensor
*
a
=
income
.
tails
[
0
]
;
if
(
b
->
outgo
.
tailNum
>
1
){
XNoder
::
MakeGrad
(
a
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGrad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sum
(
bGradTMP
,
b
->
grad
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
}
else
{
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
b
->
grad
,
beta
);
}
node
->
grad
->
Reshape
(
order
,
dimSize
);
_Div
(
node
->
grad
,
a
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
interGrad
);
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
gradient for round
for
c = round(a)
we have
dE/da = 0
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradRound
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for ROUND!"
);
// we do nothing here
// TODO: set grad = 0 if the node is the only child
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for
multiply (dot production)
gradient for
sign
for
for
c =
a * b
c =
sign(a)
we have
we have
dE/da = dE/dc * b
dE/da = 0
dE/db = dE/dc * a
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Multiply
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Sign
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLY!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SIGN!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
CheckNTErrors
(
XTensor
::
IsSameShaped
(
a
,
b
),
"Wrong sized input tensors!"
);
// we do nothing here
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
// TODO: set grad = 0 if the node is the only child
_Multiply
(
node
->
grad
,
a
,
b
->
grad
,
1.0
F
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for multiply with one dimension
gradient for sin
c = a * b
for
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
c = sin(a)
dE/da = dE/dc * b
we have
dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...)
dE/da = dE/dc * cos(a)
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
MultiplyDim
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Sin
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLYDIM
!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SIN
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
]
;
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
)
;
int
n
=
income
.
GetParamInt
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
/* dE/da */
_Cos
(
a
,
b
);
_Multiply
Dim
(
node
->
grad
,
b
,
a
->
grad
,
n
,
1.0
F
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
/* dE/db */
DelTensorBuf
(
b
);
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
node
->
grad
,
node
->
devID
,
node
->
mem
)
;
node
->
visitMark
=
NODE_FINISHED
;
_Multiply
(
node
->
grad
,
a
,
bGradTMP
);
}
if
(
n
==
order
-
1
){
/*
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
gradient for tan
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
for
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
c = tan(a)
we have
dE/da = dE/dc * 1/(cos(a))^2
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradTan
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for TAN!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XNoder
::
MakeGrad
(
a
);
_Cos
(
a
,
b
);
_PowerMe
(
b
,
-
2.0
F
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for clip
we have
dE/da = 1 lower < a < upper
dE/da = 0 otherwise
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradClip
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for CLIP!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
DTYPE
lower
=
income
.
GetParam
(
0
);
DTYPE
upper
=
income
.
GetParam
(
1
);
XNoder
::
MakeGrad
(
a
);
_ClipBackward
(
node
,
a
,
node
->
grad
,
a
->
grad
,
lower
,
upper
);
_Sum
(
a
->
grad
,
b
,
a
->
grad
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for divide
for
c = a / b
we have
dE/da = dE/dc / b
dE/db = dE/dc * a / -b^2
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradDiv
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for DIVIDE!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
ab2
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
CheckNTErrors
(
XTensor
::
IsSameShaped
(
a
,
b
),
"Wrong sized input tensors!"
);
_Div
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
_Power
(
b
,
ab2
,
-
2.0
F
);
_Multiply
(
a
,
ab2
,
ab2
);
_ScaleAndShiftMe
(
ab2
,
-
1.0
F
);
_Multiply
(
node
->
grad
,
ab2
,
b
->
grad
,
1.0
F
);
DelTensorBuf
(
ab2
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for division with one dimension
c = a / b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * (1/b)
dE/db = (dE/dc * (-a/b^2)).reduce(0,...,n-1,n+1,...)
*/
void
XMathGrad
::
GradDivDim
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for DIVDIM!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
int
n
=
income
.
GetParamInt
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
/* dE/da = dE/dc * (1/b) */
_DivDim
(
node
->
grad
,
b
,
a
->
grad
,
n
,
1.0
);
/* dE/db = dE/dc * dc/db */
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
XTensor
*
aTMP1
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
aTMP2
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
bTMP
=
NewTensorBuf
(
b
,
b
->
devID
,
b
->
mem
);
XTensor
*
interGradTMP
=
NewTensorBuf
(
node
->
grad
,
node
->
devID
,
node
->
mem
);
_Negate
(
a
,
aTMP1
);
_Power
(
b
,
bTMP
,
-
2
);
_MultiplyDim
(
aTMP1
,
bTMP
,
aTMP2
,
n
);
_Multiply
(
node
->
grad
,
aTMP2
,
interGradTMP
);
if
(
n
==
order
-
1
){
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
/* we reshape dE/dc * a to a matrix whose column number is equal to the
/* we reshape dE/dc * a to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */
size of b. Then we can reduce the matrix into a row vector. */
b
GradTMP
->
Reshape
(
2
,
reshapedSize
);
inter
GradTMP
->
Reshape
(
2
,
reshapedSize
);
if
(
b
->
outgo
.
tailNum
>
1
){
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP
2
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
bGradTMP
,
bGradTMP2
,
0
);
_ReduceSum
(
interGradTMP
,
bGradTMP
,
0
);
_Sum
(
b
->
grad
,
bGradTMP
2
,
b
->
grad
);
_Sum
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
2
);
DelTensorBuf
(
bGradTMP
);
}
}
else
{
else
{
_ReduceSum
(
b
GradTMP
,
b
->
grad
,
0
);
_ReduceSum
(
inter
GradTMP
,
b
->
grad
,
0
);
}
}
}
}
else
{
else
{
...
@@ -311,10 +432,10 @@ void XMathGrad::GradMultiplyDim(XTensor * node)
...
@@ -311,10 +432,10 @@ void XMathGrad::GradMultiplyDim(XTensor * node)
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
Then reduce along with z and x to obtain dE/db. */
b
GradTMP
->
Reshape
(
3
,
reshapedSize
);
inter
GradTMP
->
Reshape
(
3
,
reshapedSize
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
b
GradTMP
,
interGrad
,
2
);
_ReduceSum
(
inter
GradTMP
,
interGrad
,
2
);
if
(
b
->
outgo
.
tailNum
>
1
){
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP2
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
XTensor
*
bGradTMP2
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
...
@@ -327,13 +448,17 @@ void XMathGrad::GradMultiplyDim(XTensor * node)
...
@@ -327,13 +448,17 @@ void XMathGrad::GradMultiplyDim(XTensor * node)
else
{
else
{
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
}
}
DelTensorBuf
(
interGrad
);
DelTensorBuf
(
interGrad
);
}
}
DelTensor
(
bGradTMP
);
DelTensorBuf
(
aTMP1
);
DelTensorBuf
(
aTMP2
);
DelTensorBuf
(
bTMP
);
DelTensorBuf
(
interGradTMP
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for matrix multiply
gradient for matrix multiply
for c = matmul(a, b) * \alpha
for c = matmul(a, b) * \alpha
...
@@ -525,225 +650,364 @@ void XMathGrad::GradMatrixMulBatched(XTensor * node)
...
@@ -525,225 +650,364 @@ void XMathGrad::GradMatrixMulBatched(XTensor * node)
}
}
/*
/*
gradient for
log
gradient for
multiply (dot production)
for
for
c =
log(a)
c =
a * b
we have
we have
dE/da = dE/dc * 1/a
dE/da = dE/dc * b
dE/db = dE/dc * a
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Log
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Multiply
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for LOG
!"
);
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLY
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Div
(
node
->
grad
,
a
,
a
->
grad
,
1.0
F
);
CheckNTErrors
(
XTensor
::
IsSameShaped
(
a
,
b
),
"Wrong sized input tensors!"
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
_Multiply
(
node
->
grad
,
a
,
b
->
grad
,
1.0
F
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for power
gradient for multiply with one dimension
for
c = a * b
c = pow(a,p)
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
we have
dE/da = dE/dc * b
dE/da = (dE/dc) * p * a^(p-1)
dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Power
(
XTensor
*
node
)
void
XMathGrad
::
Grad
MultiplyDim
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for POWER
!"
);
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for MULTIPLYDIM
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
b
=
income
.
tails
[
1
];
int
n
=
income
.
GetParamInt
(
0
);
DTYPE
p
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Power
(
a
,
b
,
p
-
1.0
F
);
/* dE/da */
_ScaleAndShiftMe
(
b
,
p
);
_MultiplyDim
(
node
->
grad
,
b
,
a
->
grad
,
n
,
1.0
F
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensor
(
b
);
/* dE/db */
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
node
->
visitMark
=
NODE_FINISHED
;
XTensor
*
bGradTMP
=
NewTensorBuf
(
node
->
grad
,
node
->
devID
,
node
->
mem
)
;
}
_Multiply
(
node
->
grad
,
a
,
bGradTMP
);
/*
if
(
n
==
order
-
1
){
gradient for negate
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
for
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
c = -a
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
we have
dE/da = dE/dc * (-1)
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradNegate
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for NEGATE!"
);
XTensor
*
a
=
income
.
tails
[
0
];
/* we reshape dE/dc * a to a matrix whose column number is equal to the
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
size of b. Then we can reduce the matrix into a row vector. */
bGradTMP
->
Reshape
(
2
,
reshapedSize
);
XNoder
::
MakeGrad
(
a
);
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP2
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ScaleAndShift
(
node
->
grad
,
b
,
-
1.0
F
);
_ReduceSum
(
bGradTMP
,
bGradTMP2
,
0
);
_Sum
(
a
->
grad
,
b
,
a
->
grad
);
_Sum
(
b
->
grad
,
bGradTMP2
,
b
->
grad
);
DelTensorBuf
(
b
);
DelTensorBuf
(
bGradTMP2
);
}
else
{
_ReduceSum
(
bGradTMP
,
b
->
grad
,
0
);
}
}
else
{
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
1
;
reshapedSize
[
1
]
=
dimSize
[
n
];
reshapedSize
[
2
]
=
1
;
node
->
visitMark
=
NODE_FINISHED
;
for
(
int
i
=
0
;
i
<
order
;
i
++
){
}
if
(
i
<
n
)
reshapedSize
[
0
]
*=
dimSize
[
i
];
}
/*
reshapedSize
[
2
]
=
a
->
unitNum
/
(
reshapedSize
[
0
]
*
reshapedSize
[
1
]);
gradient for ScaleAndShift
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
bGradTMP
->
Reshape
(
3
,
reshapedSize
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
bGradTMP
,
interGrad
,
2
);
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP2
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGrad
,
bGradTMP2
,
0
);
_Sum
(
b
->
grad
,
bGradTMP2
,
b
->
grad
);
DelTensorBuf
(
bGradTMP2
);
}
else
{
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
}
DelTensorBuf
(
interGrad
);
}
DelTensor
(
bGradTMP
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for negate
for
for
c =
a * scale + shift
c =
-a
we have
we have
dE/da = dE/dc *
scale
dE/da = dE/dc *
(-1)
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
ScaleAndShift
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Negate
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for
SCALEANDSHIFT
!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for
NEGATE
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
DTYPE
scale
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
_ScaleAndShift
(
node
->
grad
,
b
,
-
1.0
F
);
_Sum
(
a
->
grad
,
b
,
a
->
grad
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for normalize
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradNormalize
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
5
,
"Wrong input tensor number for NORMALIZE!"
);
XTensor
*
input
=
income
.
tails
[
0
];
XTensor
*
mean
=
income
.
tails
[
1
];
XTensor
*
var
=
income
.
tails
[
2
];
XTensor
*
a
=
income
.
tails
[
3
];
XTensor
*
b
=
income
.
tails
[
4
];
XTensor
*
c
=
NewTensor
(
var
);
XTensor
*
d
=
NewTensor
(
a
);
XTensor
*
e
=
NewTensor
(
a
);
XTensor
*
f
=
NewTensor
(
a
);
XTensor
*
g
=
NewTensor
(
a
);
XTensor
*
h
=
NewTensor
(
a
);
XTensor
*
i
=
NewTensor
(
a
);
XTensor
*
j
=
NewTensor
(
a
);
XTensor
*
k
=
NewTensor
(
var
);
XTensor
*
p
=
NewTensor
(
var
);
XTensor
*
q
=
NewTensor
(
var
);
XTensor
*
r
=
NewTensor
(
a
);
XTensor
*
x
=
NewTensor
(
mean
);
XTensor
*
y
=
NewTensor
(
mean
);
XTensor
*
z
=
NewTensor
(
mean
);
DTYPE
epsilon
=
income
.
GetParam
(
1
);
int
dim
=
income
.
GetParamInt
(
0
);
int
n
=
a
->
GetDim
(
dim
);
XNoder
::
MakeGrad
(
input
);
XNoder
::
MakeGrad
(
mean
);
XNoder
::
MakeGrad
(
var
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
,
scale
);
/* dEdinput */
_ScaleAndShift
(
var
,
c
,
1.0
F
,
epsilon
);
_Unsqueeze
(
c
,
d
,
dim
,
n
);
_Power
(
d
,
e
,
-
0.5
F
);
_Multiply
(
a
,
e
,
f
);
_Multiply
(
node
->
grad
,
f
,
input
->
grad
,
1.0
F
);
/* dEdmean */
_ScaleAndShift
(
f
,
g
,
-
1.0
F
);
_ReduceSum
(
g
,
x
,
dim
);
_ReduceSum
(
node
->
grad
,
y
,
dim
);
_Multiply
(
y
,
x
,
mean
->
grad
,
1.0
F
);
/* dEdvar */
_Unsqueeze
(
mean
,
h
,
dim
,
n
);
_Sub
(
input
,
h
,
i
);
_Multiply
(
a
,
i
,
j
);
_Power
(
var
,
k
,
-
1.5
F
);
_ScaleAndShift
(
k
,
p
,
-
0.5
F
);
_ReduceSum
(
j
,
z
,
dim
);
_Multiply
(
z
,
p
,
q
);
_Multiply
(
y
,
q
,
var
->
grad
,
1.0
F
);
/* dEda */
_Multiply
(
i
,
e
,
r
);
_Multiply
(
node
->
grad
,
r
,
a
->
grad
,
1.0
F
);
/* dEdb */
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
delete
c
;
delete
d
;
delete
e
;
delete
f
;
delete
g
;
delete
h
;
delete
i
;
delete
j
;
delete
k
;
delete
p
;
delete
q
;
delete
r
;
delete
x
;
delete
y
;
delete
z
;
}
}
/*
/*
gradient for
minus
gradient for
power
for
for
c =
a - b * \beta
c =
pow(a,p)
we have
we have
dE/da = dE/dc
dE/da = (dE/dc) * p * a^(p-1)
dE/db = -dE/dc * \beta
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Sub
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Power
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUBSTRACT
!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for POWER
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
DTYPE
beta
=
income
.
GetParam
(
0
);
DTYPE
p
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Power
(
a
,
b
,
p
-
1.0
F
);
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
,
-
beta
);
_ScaleAndShiftMe
(
b
,
p
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensor
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for
divide
gradient for
ScaleAndShift
for
for
c =
a / b
c =
a * scale + shift
we have
we have
dE/da = dE/dc / b
dE/da = dE/dc * scale
dE/db = dE/dc * a / -b^2
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Div
(
XTensor
*
node
)
void
XMathGrad
::
Grad
ScaleAndShift
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for DIVIDE
!"
);
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SCALEANDSHIFT
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
ab2
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
DTYPE
scale
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
CheckNTErrors
(
XTensor
::
IsSameShaped
(
a
,
b
),
"Wrong sized input tensors!"
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
,
scale
);
_Div
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
node
->
visitMark
=
NODE_FINISHED
;
}
_Power
(
b
,
ab2
,
-
2.0
F
);
/*
_Multiply
(
a
,
ab2
,
ab2
);
gradient for minus
_ScaleAndShiftMe
(
ab2
,
-
1.0
F
);
for
_Multiply
(
node
->
grad
,
ab2
,
b
->
grad
,
1.0
F
);
c = a - b * \beta
we have
dE/da = dE/dc
dE/db = -dE/dc * \beta
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradSub
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUBSTRACT!"
);
DelTensorBuf
(
ab2
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
DTYPE
beta
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
,
-
beta
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for
divis
ion with one dimension
gradient for
subtract
ion with one dimension
c = a
/ b
c = a
- b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * (1/b)
dE/da = dE/dc
dE/db = dE/dc * b.reduce(0,...,n-1,n+1,...)
dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...)
*/
*/
void
XMathGrad
::
Grad
Div
Dim
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Sub
Dim
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for DIV
DIM!"
);
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUB
DIM!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
income
.
tails
[
1
];
XTensor
*
b
=
income
.
tails
[
1
];
int
n
=
income
.
GetParamInt
(
0
);
int
n
=
income
.
GetParamInt
(
0
);
DTYPE
beta
=
income
.
GetParam
(
1
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
XNoder
::
MakeGrad
(
b
);
/* dE/da = dE/dc * (1/b) */
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_DivDim
(
node
->
grad
,
b
,
a
->
grad
,
n
,
1.0
);
/* dE/db = dE/dc * dc/db */
int
order
=
a
->
order
;
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
XTensor
*
aTMP1
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
aTMP2
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
bTMP
=
NewTensorBuf
(
b
,
b
->
devID
,
b
->
mem
);
XTensor
*
interGradTMP
=
NewTensorBuf
(
node
->
grad
,
node
->
devID
,
node
->
mem
);
_Negate
(
a
,
aTMP1
);
_Power
(
b
,
bTMP
,
-
2
);
_MultiplyDim
(
aTMP1
,
bTMP
,
aTMP2
,
n
);
_Multiply
(
node
->
grad
,
aTMP2
,
interGradTMP
);
if
(
n
==
order
-
1
){
if
(
n
==
order
-
1
){
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
/* we reshape dE/dc * a to a matrix whose column number is equal to the
/* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */
size of b. Then we can reduce the matrix into a row vector. */
interGradTMP
->
Reshape
(
2
,
reshapedSize
);
node
->
grad
->
Reshape
(
2
,
reshapedSize
);
if
(
b
->
outgo
.
tailNum
>
1
){
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
node
->
grad
,
bGradTMP
,
0
);
_ReduceSum
(
interGradTMP
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
_Sum
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sub
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
DelTensorBuf
(
bGradTMP
);
}
}
else
{
else
{
_ReduceSum
(
interGradTMP
,
b
->
grad
,
0
);
_ReduceSum
(
node
->
grad
,
b
->
grad
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
b
->
grad
,
beta
);
_ScaleAndShiftMe
(
b
->
grad
,
-
1.0
F
);
}
}
node
->
grad
->
Reshape
(
order
,
dimSize
);
}
}
else
{
else
{
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
...
@@ -760,316 +1024,153 @@ void XMathGrad::GradDivDim(XTensor * node)
...
@@ -760,316 +1024,153 @@ void XMathGrad::GradDivDim(XTensor * node)
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
Then reduce along with z and x to obtain dE/db. */
interGradTMP
->
Reshape
(
3
,
reshapedSize
);
node
->
grad
->
Reshape
(
3
,
reshapedSize
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGradTMP
,
interGrad
,
2
);
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP2
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGrad
,
bGradTMP2
,
0
);
_ReduceSum
(
node
->
grad
,
interGrad
,
2
);
_Sum
(
b
->
grad
,
bGradTMP2
,
b
->
grad
);
DelTensorBuf
(
bGradTMP2
);
if
(
b
->
outgo
.
tailNum
>
1
){
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGrad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sub
(
b
->
grad
,
bGradTMP
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
}
}
else
{
else
{
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
}
if
(
beta
!=
1.0
F
)
DelTensorBuf
(
interGrad
);
_ScaleAndShiftMe
(
b
->
grad
,
beta
);
_ScaleAndShiftMe
(
b
->
grad
,
-
1.0
F
);
}
}
DelTensorBuf
(
aTMP1
);
node
->
grad
->
Reshape
(
order
,
dimSize
);
DelTensorBuf
(
aTMP2
);
DelTensorBuf
(
bTMP
);
DelTensorBuf
(
interGradTMP
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for exp
for
c = exp(a)
we have
dE/da = dE/dc * exp(a)
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradExp
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for EXP!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XNoder
::
MakeGrad
(
a
);
_Exp
(
a
,
b
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for sin
for
c = sin(a)
we have
dE/da = dE/dc * cos(a)
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradSin
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SIN!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XNoder
::
MakeGrad
(
a
);
_Cos
(
a
,
b
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for cos
for
c = cos(a)
we have
dE/da = dE/dc * -sin(a)
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradCos
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for COS!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XNoder
::
MakeGrad
(
a
);
_Sin
(
a
,
b
);
DelTensorBuf
(
interGrad
);
_ScaleAndShiftMe
(
b
,
-
1.0
F
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
}
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
/*
/*
gradient for
tan
gradient for
sum
for
for
c =
tan(a)
c =
a + b * \beta
we have
we have
dE/da = dE/dc * 1/(cos(a))^2
dE/da = dE/dc
dE/db = dE/dc * \beta
>> node - the node (c) for backward computation
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Tan
(
XTensor
*
node
)
void
XMathGrad
::
Grad
Sum
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for TAN
!"
);
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUM
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
b
=
income
.
tails
[
1
];
DTYPE
beta
=
income
.
GetParam
(
0
);
XNoder
::
MakeGrad
(
a
);
_Cos
(
a
,
b
);
_PowerMe
(
b
,
-
2.0
F
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
}
/*
gradient for normalize
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradNormalize
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
5
,
"Wrong input tensor number for NORMALIZE!"
);
XTensor
*
input
=
income
.
tails
[
0
];
XTensor
*
mean
=
income
.
tails
[
1
];
XTensor
*
var
=
income
.
tails
[
2
];
XTensor
*
a
=
income
.
tails
[
3
];
XTensor
*
b
=
income
.
tails
[
4
];
XTensor
*
c
=
NewTensor
(
var
);
XTensor
*
d
=
NewTensor
(
a
);
XTensor
*
e
=
NewTensor
(
a
);
XTensor
*
f
=
NewTensor
(
a
);
XTensor
*
g
=
NewTensor
(
a
);
XTensor
*
h
=
NewTensor
(
a
);
XTensor
*
i
=
NewTensor
(
a
);
XTensor
*
j
=
NewTensor
(
a
);
XTensor
*
k
=
NewTensor
(
var
);
XTensor
*
p
=
NewTensor
(
var
);
XTensor
*
q
=
NewTensor
(
var
);
XTensor
*
r
=
NewTensor
(
a
);
XTensor
*
x
=
NewTensor
(
mean
);
XTensor
*
y
=
NewTensor
(
mean
);
XTensor
*
z
=
NewTensor
(
mean
);
DTYPE
epsilon
=
income
.
GetParam
(
1
);
int
dim
=
income
.
GetParamInt
(
0
);
int
n
=
a
->
GetDim
(
dim
);
XNoder
::
MakeGrad
(
input
);
XNoder
::
MakeGrad
(
mean
);
XNoder
::
MakeGrad
(
var
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
XNoder
::
MakeGrad
(
b
);
/* dEdinput */
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_ScaleAndShift
(
var
,
c
,
1.0
F
,
epsilon
);
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
,
beta
);
_Unsqueeze
(
c
,
d
,
dim
,
n
);
_Power
(
d
,
e
,
-
0.5
F
);
_Multiply
(
a
,
e
,
f
);
_Multiply
(
node
->
grad
,
f
,
input
->
grad
,
1.0
F
);
/* dEdmean */
_ScaleAndShift
(
f
,
g
,
-
1.0
F
);
_ReduceSum
(
g
,
x
,
dim
);
_ReduceSum
(
node
->
grad
,
y
,
dim
);
_Multiply
(
y
,
x
,
mean
->
grad
,
1.0
F
);
/* dEdvar */
_Unsqueeze
(
mean
,
h
,
dim
,
n
);
_Sub
(
input
,
h
,
i
);
_Multiply
(
a
,
i
,
j
);
_Power
(
var
,
k
,
-
1.5
F
);
_ScaleAndShift
(
k
,
p
,
-
0.5
F
);
_ReduceSum
(
j
,
z
,
dim
);
_Multiply
(
z
,
p
,
q
);
_Multiply
(
y
,
q
,
var
->
grad
,
1.0
F
);
/* dEda */
_Multiply
(
i
,
e
,
r
);
_Multiply
(
node
->
grad
,
r
,
a
->
grad
,
1.0
F
);
/* dEdb */
_Sum
(
b
->
grad
,
node
->
grad
,
b
->
grad
);
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
delete
c
;
delete
d
;
delete
e
;
delete
f
;
delete
g
;
delete
h
;
delete
i
;
delete
j
;
delete
k
;
delete
p
;
delete
q
;
delete
r
;
delete
x
;
delete
y
;
delete
z
;
}
}
/*
/*
gradient for absolute
gradient for sum with one dimension
for
c = a + b * \beta
c = |a|
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
we have
dE/da = dE/dc
dE/da = dE/dc a >= 0
dE/db = dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
-dE/dc a < 0
>> node - the node (c) for backward computation
*/
*/
void
XMathGrad
::
Grad
Absolute
(
XTensor
*
node
)
void
XMathGrad
::
Grad
SumDim
(
XTensor
*
node
)
{
{
XLink
&
income
=
node
->
income
;
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for ABSOLUTE
!"
);
CheckNTErrors
(
income
.
tailNum
==
2
,
"Wrong input tensor number for SUMDIM
!"
);
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
a
=
income
.
tails
[
0
];
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
XTensor
*
b
=
income
.
tails
[
1
];
int
n
=
income
.
GetParamInt
(
0
);
DTYPE
beta
=
income
.
GetParam
(
1
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
a
);
XNoder
::
MakeGrad
(
b
);
_Sign
(
a
,
b
);
_Sum
(
a
->
grad
,
node
->
grad
,
a
->
grad
);
_Multiply
(
node
->
grad
,
b
,
a
->
grad
,
1.0
F
);
DelTensorBuf
(
b
);
int
order
=
a
->
order
;
int
dimSize
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dimSize
,
a
->
dimSize
,
sizeof
(
int
)
*
a
->
order
);
node
->
visitMark
=
NODE_FINISHED
;
if
(
n
==
order
-
1
){
}
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
a
->
unitNum
/
dimSize
[
order
-
1
];
reshapedSize
[
1
]
=
dimSize
[
order
-
1
];
/*
/* we reshape dE/dc to a matrix whose column number is equal to the
gradient for sign
size of b. Then we can reduce the matrix into a row vector. */
for
node
->
grad
->
Reshape
(
2
,
reshapedSize
);
c = sign(a)
we have
dE/da = 0
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradSign
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for SIGN!"
);
// we do nothing here
if
(
b
->
outgo
.
tailNum
>
1
){
// TODO: set grad = 0 if the node is the only child
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
node
->
grad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sum
(
bGradTMP
,
b
->
grad
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
}
else
{
_ReduceSum
(
node
->
grad
,
b
->
grad
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
b
->
grad
,
beta
);
}
node
->
visitMark
=
NODE_FINISHED
;
node
->
grad
->
Reshape
(
order
,
dimSize
);
}
}
else
{
int
reshapedSize
[
MAX_TENSOR_DIM_NUM
];
reshapedSize
[
0
]
=
1
;
reshapedSize
[
1
]
=
dimSize
[
n
];
reshapedSize
[
2
]
=
1
;
/*
for
(
int
i
=
0
;
i
<
order
;
i
++
){
gradient for round
if
(
i
<
n
)
for
reshapedSize
[
0
]
*=
dimSize
[
i
];
c = round(a)
}
we have
dE/da = 0
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradRound
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for ROUND!"
);
// we do nothing here
reshapedSize
[
2
]
=
a
->
unitNum
/
(
reshapedSize
[
0
]
*
reshapedSize
[
1
]);
// TODO: set grad = 0 if the node is the only child
node
->
visitMark
=
NODE_FINISHED
;
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
}
Then reduce along with z and x to obtain dE/db. */
node
->
grad
->
Reshape
(
3
,
reshapedSize
);
/*
XTensor
*
interGrad
=
NewTensorBuf
(
2
,
reshapedSize
,
b
->
dataType
,
b
->
denseRatio
,
b
->
devID
,
b
->
mem
);
gradient for clip
we have
dE/da = 1 lower < a < upper
dE/da = 0 otherwise
>> node - the node (c) for backward computation
*/
void
XMathGrad
::
GradClip
(
XTensor
*
node
)
{
XLink
&
income
=
node
->
income
;
CheckNTErrors
(
income
.
tailNum
==
1
,
"Wrong input tensor number for CLIP!"
);
XTensor
*
a
=
income
.
tails
[
0
];
_ReduceSum
(
node
->
grad
,
interGrad
,
2
);
XTensor
*
b
=
NewTensorBuf
(
a
,
a
->
devID
,
a
->
mem
);
DTYPE
lower
=
income
.
GetParam
(
0
);
if
(
b
->
outgo
.
tailNum
>
1
){
DTYPE
upper
=
income
.
GetParam
(
1
);
XTensor
*
bGradTMP
=
NewTensorBuf
(
b
->
grad
,
b
->
devID
,
b
->
mem
);
_ReduceSum
(
interGrad
,
bGradTMP
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
bGradTMP
,
beta
);
_Sum
(
bGradTMP
,
b
->
grad
,
b
->
grad
);
DelTensorBuf
(
bGradTMP
);
}
else
{
_ReduceSum
(
interGrad
,
b
->
grad
,
0
);
if
(
beta
!=
1.0
F
)
_ScaleAndShiftMe
(
b
->
grad
,
beta
);
}
XNoder
::
MakeGrad
(
a
);
node
->
grad
->
Reshape
(
order
,
dimSize
);
_ClipBackward
(
node
,
a
,
node
->
grad
,
a
->
grad
,
lower
,
upper
);
DelTensorBuf
(
interGrad
);
_Sum
(
a
->
grad
,
b
,
a
->
grad
);
DelTensorBuf
(
b
);
}
node
->
visitMark
=
NODE_FINISHED
;
node
->
visitMark
=
NODE_FINISHED
;
}
}
...
...
source/network/XBackwardMath.h
查看文件 @
f7f33b29
...
@@ -40,23 +40,50 @@ public:
...
@@ -40,23 +40,50 @@ public:
bool
IsMathOP
(
XTensor
*
node
);
bool
IsMathOP
(
XTensor
*
node
);
private
:
private
:
/* gradient for sum: c = a + b * \beta */
/* gradient for absolute */
static
static
void
Grad
Sum
(
XTensor
*
node
);
void
Grad
Absolute
(
XTensor
*
node
);
/* gradient for sum with one dimension: c = a + b * \beta
/* gradient for cos */
where the size of b is equal to that of one dimension of a */
static
static
void
Grad
SumDim
(
XTensor
*
node
);
void
Grad
Cos
(
XTensor
*
node
);
/* gradient for
multiply (dot production): c = a * b * \alpha
*/
/* gradient for
exp
*/
static
static
void
Grad
Multiply
(
XTensor
*
node
);
void
Grad
Exp
(
XTensor
*
node
);
/* gradient for multiply one dimension: c = a * b * \alpha
/* gradient for log: c = log(a) */
where the size of b is equal to that of one dimension of a */
static
static
void
GradMultiplyDim
(
XTensor
*
node
);
void
GradLog
(
XTensor
*
node
);
/* gradient for round */
static
void
GradRound
(
XTensor
*
node
);
/* gradient for sign */
static
void
GradSign
(
XTensor
*
node
);
/* gradient for sin */
static
void
GradSin
(
XTensor
*
node
);
/* gradient for tan */
static
void
GradTan
(
XTensor
*
node
);
/* gradient for clip */
static
void
GradClip
(
XTensor
*
node
);
/* gradient for Divide */
static
void
GradDiv
(
XTensor
*
node
);
/* gradient for DivideDim */
static
void
GradDivDim
(
XTensor
*
node
);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
static
...
@@ -73,18 +100,27 @@ private:
...
@@ -73,18 +100,27 @@ private:
static
static
void
GradMatrixMulBatched
(
XTensor
*
node
);
void
GradMatrixMulBatched
(
XTensor
*
node
);
/* gradient for
log: c = log(a)
*/
/* gradient for
multiply (dot production): c = a * b * \alpha
*/
static
static
void
Grad
Log
(
XTensor
*
node
);
void
Grad
Multiply
(
XTensor
*
node
);
/* gradient for power */
/* gradient for multiply one dimension: c = a * b * \alpha
where the size of b is equal to that of one dimension of a */
static
static
void
Grad
Power
(
XTensor
*
node
);
void
Grad
MultiplyDim
(
XTensor
*
node
);
/* gradient for negate */
/* gradient for negate */
static
static
void
GradNegate
(
XTensor
*
node
);
void
GradNegate
(
XTensor
*
node
);
/* gradient for normalize */
static
void
GradNormalize
(
XTensor
*
node
);
/* gradient for power */
static
void
GradPower
(
XTensor
*
node
);
/* gradient for ScaleAndShift */
/* gradient for ScaleAndShift */
static
static
void
GradScaleAndShift
(
XTensor
*
node
);
void
GradScaleAndShift
(
XTensor
*
node
);
...
@@ -93,13 +129,19 @@ private:
...
@@ -93,13 +129,19 @@ private:
static
static
void
GradSub
(
XTensor
*
node
);
void
GradSub
(
XTensor
*
node
);
/* gradient for Divide */
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
static
void
GradDiv
(
XTensor
*
node
);
void
GradSubDim
(
XTensor
*
node
);
/* gradient for
DivideDim
*/
/* gradient for
sum: c = a + b * \beta
*/
static
static
void
GradDivDim
(
XTensor
*
node
);
void
GradSum
(
XTensor
*
node
);
/* gradient for sum with one dimension: c = a + b * \beta
where the size of b is equal to that of one dimension of a */
static
void
GradSumDim
(
XTensor
*
node
);
/* gradient for reduceMean */
/* gradient for reduceMean */
static
static
...
@@ -116,42 +158,6 @@ private:
...
@@ -116,42 +158,6 @@ private:
/* gradient for reduceVariance */
/* gradient for reduceVariance */
static
static
void
GradReduceVariance
(
XTensor
*
node
);
void
GradReduceVariance
(
XTensor
*
node
);
/* gradient for sin */
static
void
GradSin
(
XTensor
*
node
);
/* gradient for cos */
static
void
GradCos
(
XTensor
*
node
);
/* gradient for tan */
static
void
GradTan
(
XTensor
*
node
);
/* gradient for exp */
static
void
GradExp
(
XTensor
*
node
);
/* gradient for normalize */
static
void
GradNormalize
(
XTensor
*
node
);
/* gradient for absolute */
static
void
GradAbsolute
(
XTensor
*
node
);
/* gradient for sign */
static
void
GradSign
(
XTensor
*
node
);
/* gradient for clip */
static
void
GradClip
(
XTensor
*
node
);
/* gradient for round */
static
void
GradRound
(
XTensor
*
node
);
};
};
}
}
...
...
source/network/XNet.cpp
查看文件 @
f7f33b29
...
@@ -137,8 +137,6 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
...
@@ -137,8 +137,6 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
XTensor
*
x
=
income
.
tails
[
0
];
XTensor
*
x
=
income
.
tails
[
0
];
XNoder
::
MakeGrad
(
x
);
XNoder
::
MakeGrad
(
x
);
lossGrad
.
Compute
(
gold
,
root
,
x
,
NULL
,
x
->
grad
,
funcID
,
params
,
loss
);
lossGrad
.
Compute
(
gold
,
root
,
x
,
NULL
,
x
->
grad
,
funcID
,
params
,
loss
);
//XNoder::MakeGrad(root);
//lossGrad.Compute(gold, root, x, root->grad, x->grad, funcID, params, loss);
root
->
visitMark
=
NODE_FINISHED
;
root
->
visitMark
=
NODE_FINISHED
;
}
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
...
...
source/sample/transformer/T2TAttention.cpp
查看文件 @
f7f33b29
...
@@ -35,6 +35,8 @@ T2TAttention::T2TAttention()
...
@@ -35,6 +35,8 @@ T2TAttention::T2TAttention()
dk
=
-
1
;
dk
=
-
1
;
dv
=
-
1
;
dv
=
-
1
;
d
=
-
1
;
d
=
-
1
;
isMasked
=
false
;
ignored
=
0
;
}
}
/* deconstructor */
/* deconstructor */
...
@@ -46,13 +48,19 @@ T2TAttention::~T2TAttention()
...
@@ -46,13 +48,19 @@ T2TAttention::~T2TAttention()
initialize the model
initialize the model
>> argc - number of arguments
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> argv - list of pointers to the arguments
>> myIgnored - number of position ignored in attention (from the begining)
>> myIsMasked - indicates whether the attention is with a mask
>> myDevID - device id
>> myDevID - device id
>> myMem - the memory pool
>> myMem - the memory pool
*/
*/
void
T2TAttention
::
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
,
XMem
*
myMem
)
void
T2TAttention
::
InitModel
(
int
argc
,
const
char
**
argv
,
bool
myIsMasked
,
int
myIgnored
,
int
myDevID
,
XMem
*
myMem
)
{
{
devID
=
myDevID
;
devID
=
myDevID
;
mem
=
myMem
;
mem
=
myMem
;
isMasked
=
myIsMasked
;
ignored
=
myIgnored
;
float
minmax
=
0
;
float
minmax
=
0
;
...
@@ -82,9 +90,10 @@ make the network
...
@@ -82,9 +90,10 @@ make the network
and H = vector size of each position
and H = vector size of each position
>> q - queries
>> q - queries
>> v - values
>> v - values
>> maske - as it is
<< return - multi-attention result
<< return - multi-attention result
*/
*/
XTensor
T2TAttention
::
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
)
XTensor
T2TAttention
::
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
,
XTensor
&
mask
)
{
{
XTensor
k2
;
XTensor
k2
;
XTensor
q2
;
XTensor
q2
;
...
@@ -105,10 +114,18 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v)
...
@@ -105,10 +114,18 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v)
vheads
=
Split
(
v2
,
v2
.
order
-
1
,
nhead
);
vheads
=
Split
(
v2
,
v2
.
order
-
1
,
nhead
);
XTensor
att
;
XTensor
att
;
XTensor
dot
;
XTensor
scalar
;
XTensor
scalar
;
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */
/* scalar = softmax(Q * K^T / sqrt(dk)) * V */
scalar
=
Softmax
(
Linear
(
BMMul
(
qheads
,
X_NOTRANS
,
kheads
,
X_TRANS
),
1
/
(
float
)
sqrt
((
float
)
dk
)),
-
1
);
dot
=
BMMul
(
qheads
,
X_NOTRANS
,
kheads
,
X_TRANS
);
if
(
isMasked
)
dot
=
dot
+
mask
;
scalar
=
Softmax
(
Linear
(
dot
,
1
/
(
float
)
sqrt
((
float
)
dk
)),
-
1
);
if
(
ignored
>
0
)
_SetDataDim
(
&
scalar
,
0
,
ignored
,
scalar
.
order
-
2
,
1e-9
F
);
att
=
BMMul
(
scalar
,
vheads
);
att
=
BMMul
(
scalar
,
vheads
);
/* concatenate the heads */
/* concatenate the heads */
...
...
source/sample/transformer/T2TAttention.h
查看文件 @
f7f33b29
...
@@ -66,6 +66,13 @@ public:
...
@@ -66,6 +66,13 @@ public:
/* size of input Q, K and V */
/* size of input Q, K and V */
int
d
;
int
d
;
/* indicates whether the attention is masked */
bool
isMasked
;
/* some positions can be ignored in attention. this is useful in lm where the first position needs
special design for the attention model. */
int
ignored
;
public
:
public
:
/* constructor */
/* constructor */
T2TAttention
();
T2TAttention
();
...
@@ -74,10 +81,12 @@ public:
...
@@ -74,10 +81,12 @@ public:
~
T2TAttention
();
~
T2TAttention
();
/* initialize the model */
/* initialize the model */
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
void
InitModel
(
int
argc
,
const
char
**
argv
,
bool
myIsMasked
,
int
myIgnored
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the network */
/* make the network */
XTensor
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
);
XTensor
Make
(
XTensor
&
k
,
XTensor
&
q
,
XTensor
&
v
,
XTensor
&
mask
);
};
};
}
}
...
...
source/sample/transformer/T2TEmbedding.cpp
查看文件 @
f7f33b29
...
@@ -136,7 +136,7 @@ XTensor T2TEmbedder::Make(XTensor &input)
...
@@ -136,7 +136,7 @@ XTensor T2TEmbedder::Make(XTensor &input)
wordEmbedding
=
Linear
(
MMul
(
input
,
w
),
(
float
)
sqrt
((
float
)
d
));
wordEmbedding
=
Linear
(
MMul
(
input
,
w
),
(
float
)
sqrt
((
float
)
d
));
/* we sum over the two embeddings */
/* we sum over the two embeddings */
return
wordEmbedding
+
posEmbedding
;
return
wordEmbedding
+
posEmbedding
;
}
}
}
}
source/sample/transformer/T2TEncoder.cpp
查看文件 @
f7f33b29
...
@@ -46,13 +46,18 @@ AttEncoder::~AttEncoder()
...
@@ -46,13 +46,18 @@ AttEncoder::~AttEncoder()
initialize the model
initialize the model
>> argc - number of arguments
>> argc - number of arguments
>> argv - list of pointers to the arguments
>> argv - list of pointers to the arguments
>> myIsMasked - indicates whether the masked attention is employed
>> myIgnored - number of positions ignored in attention (from the start)
>> myDevID - device id
>> myDevID - device id
>> myMem - the memory pool
>> myMem - the memory pool
*/
*/
void
AttEncoder
::
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
,
XMem
*
myMem
)
void
AttEncoder
::
InitModel
(
int
argc
,
const
char
**
argv
,
bool
myIsMasked
,
int
myIgnored
,
int
myDevID
,
XMem
*
myMem
)
{
{
devID
=
myDevID
;
devID
=
myDevID
;
mem
=
myMem
;
mem
=
myMem
;
ignored
=
myIgnored
;
LoadParamInt
(
argc
,
argv
,
"nlayer"
,
&
nlayer
,
6
);
LoadParamInt
(
argc
,
argv
,
"nlayer"
,
&
nlayer
,
6
);
LoadParamInt
(
argc
,
argv
,
"hsize"
,
&
hSize
,
DEFAULT_EMBEDDING_SIZE
);
LoadParamInt
(
argc
,
argv
,
"hsize"
,
&
hSize
,
DEFAULT_EMBEDDING_SIZE
);
...
@@ -72,7 +77,7 @@ void AttEncoder::InitModel(int argc, const char ** argv, int myDevID, XMem * myM
...
@@ -72,7 +77,7 @@ void AttEncoder::InitModel(int argc, const char ** argv, int myDevID, XMem * myM
/* initialize the stacked layers */
/* initialize the stacked layers */
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
){
for
(
int
i
=
0
;
i
<
nlayer
;
i
++
){
attentions
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
attentions
[
i
].
InitModel
(
argc
,
argv
,
my
IsMasked
,
myIgnored
,
my
DevID
,
myMem
);
fnns
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
fnns
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
attLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
attLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
fnnLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
fnnLayerNorms
[
i
].
InitModel
(
argc
,
argv
,
myDevID
,
myMem
);
...
@@ -82,9 +87,11 @@ void AttEncoder::InitModel(int argc, const char ** argv, int myDevID, XMem * myM
...
@@ -82,9 +87,11 @@ void AttEncoder::InitModel(int argc, const char ** argv, int myDevID, XMem * myM
/*
/*
make the encoding network
make the encoding network
>> input - the input tensor of the encoder
>> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid
>> skipInputRes - indicates whether we skip the residual connection of the first layer
<< return - the output tensor of the encoder
<< return - the output tensor of the encoder
*/
*/
XTensor
AttEncoder
::
Make
(
XTensor
&
input
)
XTensor
AttEncoder
::
Make
(
XTensor
&
input
,
XTensor
&
mask
,
bool
skipInputRes
)
{
{
XTensor
x
;
XTensor
x
;
...
@@ -96,8 +103,18 @@ XTensor AttEncoder::Make(XTensor &input)
...
@@ -96,8 +103,18 @@ XTensor AttEncoder::Make(XTensor &input)
XTensor
fnn
;
XTensor
fnn
;
XTensor
res
;
XTensor
res
;
if
(
skipInputRes
&&
i
==
0
){
/* self attention */
/* self attention */
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
);
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
,
mask
);
/* TODO: dropout */
/* layer normalization */
x
=
attLayerNorms
[
i
].
Make
(
att
);
}
else
{
/* self attention */
att
=
attentions
[
i
].
Make
(
x
,
x
,
x
,
mask
);
/* residual connection */
/* residual connection */
res
=
Sum
(
att
,
x
);
res
=
Sum
(
att
,
x
);
...
@@ -106,6 +123,7 @@ XTensor AttEncoder::Make(XTensor &input)
...
@@ -106,6 +123,7 @@ XTensor AttEncoder::Make(XTensor &input)
/* layer normalization */
/* layer normalization */
x
=
attLayerNorms
[
i
].
Make
(
res
);
x
=
attLayerNorms
[
i
].
Make
(
res
);
}
/* fnn */
/* fnn */
fnn
=
fnns
[
i
].
Make
(
x
);
fnn
=
fnns
[
i
].
Make
(
x
);
...
...
source/sample/transformer/T2TEncoder.h
查看文件 @
f7f33b29
...
@@ -40,7 +40,7 @@ class T2TEncoder
...
@@ -40,7 +40,7 @@ class T2TEncoder
{
{
public
:
public
:
virtual
virtual
XTensor
Make
(
XTensor
&
input
)
=
0
;
XTensor
Make
(
XTensor
&
input
,
XTensor
&
mask
,
bool
skipInputRes
)
=
0
;
};
};
/*
/*
...
@@ -49,7 +49,7 @@ the encoder based on RNN
...
@@ -49,7 +49,7 @@ the encoder based on RNN
class
RNNEncoder
:
T2TEncoder
class
RNNEncoder
:
T2TEncoder
{
{
public
:
public
:
XTensor
Make
(
XTensor
&
input
);
XTensor
Make
(
XTensor
&
input
,
XTensor
&
mask
,
bool
skipInputRes
);
};
};
...
@@ -77,6 +77,10 @@ public:
...
@@ -77,6 +77,10 @@ public:
/* vocabulary size */
/* vocabulary size */
int
vSize
;
int
vSize
;
/* some positions can be ignored in attention. this is useful in lm where the first position needs
special design for the attention model. */
int
ignored
;
/* embedding of word at each position */
/* embedding of word at each position */
T2TEmbedder
embedder
;
T2TEmbedder
embedder
;
...
@@ -106,10 +110,12 @@ public:
...
@@ -106,10 +110,12 @@ public:
~
AttEncoder
();
~
AttEncoder
();
/* initialize the model */
/* initialize the model */
void
InitModel
(
int
argc
,
const
char
**
argv
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
void
InitModel
(
int
argc
,
const
char
**
argv
,
bool
myIsMasked
,
int
myIgnored
,
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* make the encoding network */
/* make the encoding network */
XTensor
Make
(
XTensor
&
input
);
XTensor
Make
(
XTensor
&
input
,
XTensor
&
mask
,
bool
skipInputRes
);
};
};
...
...
source/sample/transformer/T2TLayerNormal.cpp
查看文件 @
f7f33b29
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/
*/
#include <math.h>
#include "T2TLayerNormal.h"
#include "T2TLayerNormal.h"
#include "T2TUtility.h"
#include "T2TUtility.h"
#include "T2TEmbedding.h"
#include "T2TEmbedding.h"
...
@@ -89,14 +90,13 @@ XTensor T2TLN::Make(XTensor &input)
...
@@ -89,14 +90,13 @@ XTensor T2TLN::Make(XTensor &input)
/* standard = sqrt(variance) */
/* standard = sqrt(variance) */
standard
=
Power
(
variance
,
0.5
F
);
standard
=
Power
(
variance
,
0.5
F
);
/* unsqueeze mean and standard deviation to fit them into
/* unsqueeze mean and standard deviation to fit them into
the same shape of x */
the same shape of x */
meanFilled
=
Unsqueeze
(
mean
,
x
.
order
-
1
,
x
.
GetDim
(
-
1
));
meanFilled
=
Unsqueeze
(
mean
,
x
.
order
-
1
,
x
.
GetDim
(
-
1
));
standardFilled
=
Unsqueeze
(
standard
,
x
.
order
-
1
,
x
.
GetDim
(
-
1
));
standardFilled
=
Unsqueeze
(
standard
,
x
.
order
-
1
,
x
.
GetDim
(
-
1
));
/* x' = (x - \mu)/standard */
/* x' = (x - \mu)/standard */
xn
=
(
x
-
meanFilled
)
/
standardFilled
;
xn
=
(
x
-
meanFilled
)
/
standardFilled
;
/* result = x' * w + b */
/* result = x' * w + b */
return
MMul
(
xn
,
w
)
+
b
;
return
MMul
(
xn
,
w
)
+
b
;
...
...
source/sample/transformer/T2TModel.cpp
查看文件 @
f7f33b29
...
@@ -34,6 +34,7 @@ T2TModel::T2TModel()
...
@@ -34,6 +34,7 @@ T2TModel::T2TModel()
mem
=
NULL
;
mem
=
NULL
;
isLM
=
false
;
isLM
=
false
;
isMT
=
false
;
isMT
=
false
;
nhead
=
1
;
}
}
/* de-constructor */
/* de-constructor */
...
@@ -55,24 +56,27 @@ void T2TModel::InitModel(int argc, const char ** argv)
...
@@ -55,24 +56,27 @@ void T2TModel::InitModel(int argc, const char ** argv)
LoadParamBool
(
argc
,
argv
,
"mem"
,
&
useMem
,
useMem
);
LoadParamBool
(
argc
,
argv
,
"mem"
,
&
useMem
,
useMem
);
LoadParamBool
(
argc
,
argv
,
"lm"
,
&
isLM
,
true
);
LoadParamBool
(
argc
,
argv
,
"lm"
,
&
isLM
,
true
);
LoadParamBool
(
argc
,
argv
,
"mt"
,
&
isMT
,
false
);
LoadParamBool
(
argc
,
argv
,
"mt"
,
&
isMT
,
false
);
LoadParamInt
(
argc
,
argv
,
"nhead"
,
&
nhead
,
8
);
if
(
useMem
){
if
(
useMem
){
delete
mem
;
delete
mem
;
mem
=
new
XMem
(
devID
);
mem
=
new
XMem
(
devID
);
}
}
encoder
.
InitModel
(
argc
,
argv
,
devID
,
mem
);
encoder
.
InitModel
(
argc
,
argv
,
isLM
,
isLM
?
1
:
0
,
devID
,
mem
);
outputLayer
.
InitModel
(
argc
,
argv
,
devID
,
mem
);
outputLayer
.
InitModel
(
argc
,
argv
,
devID
,
mem
);
}
}
/*
/*
make the encoding network
make the encoding network
>> input - input tensor
>> input - input tensor
>> mask - the mask for positions that are/not involved in computation
>> skipInputRes - indicates whether we skip the residual connection of the first layer
<< return - encoding result
<< return - encoding result
*/
*/
XTensor
T2TModel
::
MakeEncoding
(
XTensor
&
input
)
XTensor
T2TModel
::
MakeEncoding
(
XTensor
&
input
,
XTensor
&
mask
,
bool
skipInputRes
)
{
{
return
encoder
.
Make
(
input
);
return
encoder
.
Make
(
input
,
mask
,
skipInputRes
);
}
}
/*
/*
...
@@ -85,8 +89,23 @@ void T2TModel::Make(XTensor &input, XTensor &output)
...
@@ -85,8 +89,23 @@ void T2TModel::Make(XTensor &input, XTensor &output)
XTensor
encoding
;
XTensor
encoding
;
if
(
isLM
){
if
(
isLM
){
encoding
=
MakeEncoding
(
input
);
/* generate mask to see "previous" words only */
int
len
=
input
.
GetDim
(
input
.
order
-
2
);
int
*
dims
=
new
int
[
input
.
order
+
1
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
)
dims
[
i
+
1
]
=
input
.
GetDim
(
i
);
dims
[
0
]
=
nhead
;
dims
[
input
.
order
]
=
len
;
XTensor
mask
(
input
.
order
+
1
,
dims
,
X_FLOAT
,
1.0
F
,
input
.
devID
,
input
.
mem
);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9 */
_SetDataLowTri
(
&
mask
,
1e9
F
,
-
1
);
_ScaleAndShiftMe
(
&
mask
,
1.0
F
,
-
1e9
F
);
encoding
=
MakeEncoding
(
input
,
mask
,
true
);
outputLayer
.
Make
(
encoding
,
output
);
outputLayer
.
Make
(
encoding
,
output
);
delete
[]
dims
;
}
}
else
{
else
{
ShowNTErrors
(
"TODO!"
);
ShowNTErrors
(
"TODO!"
);
...
...
source/sample/transformer/T2TModel.h
查看文件 @
f7f33b29
...
@@ -55,6 +55,9 @@ public:
...
@@ -55,6 +55,9 @@ public:
/* indicates whether the model is running for machine translation */
/* indicates whether the model is running for machine translation */
bool
isMT
;
bool
isMT
;
/* number of heads in the attention model */
int
nhead
;
public
:
public
:
/* constructor */
/* constructor */
T2TModel
();
T2TModel
();
...
@@ -66,7 +69,7 @@ public:
...
@@ -66,7 +69,7 @@ public:
void
InitModel
(
int
argc
,
const
char
**
argv
);
void
InitModel
(
int
argc
,
const
char
**
argv
);
/* make the encoding network */
/* make the encoding network */
XTensor
MakeEncoding
(
XTensor
&
input
);
XTensor
MakeEncoding
(
XTensor
&
input
,
XTensor
&
mask
,
bool
skipInputRes
);
/* make the entire network (with the output softmax layer) */
/* make the entire network (with the output softmax layer) */
void
Make
(
XTensor
&
input
,
XTensor
&
output
);
void
Make
(
XTensor
&
input
,
XTensor
&
output
);
...
...
source/sample/transformer/T2TUtility.cpp
查看文件 @
f7f33b29
...
@@ -100,7 +100,9 @@ void ShowParams(int argc, const char ** argv)
...
@@ -100,7 +100,9 @@ void ShowParams(int argc, const char ** argv)
{
{
fprintf
(
stderr
,
"args:
\n
"
);
fprintf
(
stderr
,
"args:
\n
"
);
for
(
int
i
=
0
;
i
<
argc
;
i
++
){
for
(
int
i
=
0
;
i
<
argc
;
i
++
){
if
(
argv
[
i
][
0
]
==
'-'
){
if
(
argv
[
i
][
1
]
==
0
)
continue
;
if
(
argv
[
i
][
0
]
==
'-'
&&
(
argv
[
i
][
1
]
<
'1'
||
argv
[
i
][
1
]
>
'9'
)){
if
(
i
+
1
<
argc
&&
argv
[
i
+
1
][
0
]
!=
'-'
)
if
(
i
+
1
<
argc
&&
argv
[
i
+
1
][
0
]
!=
'-'
)
fprintf
(
stderr
,
" %s=%s
\n
"
,
argv
[
i
],
argv
[
i
+
1
]);
fprintf
(
stderr
,
" %s=%s
\n
"
,
argv
[
i
],
argv
[
i
+
1
]);
else
else
...
...
source/tensor/core/arithmetic/Div.cpp
查看文件 @
f7f33b29
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include "../../XName.h"
#include "../../XName.h"
#include "Div.h"
#include "Div.h"
#include "Div.cuh"
#include "Div.cuh"
#include "DivDim.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
@@ -138,6 +139,33 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
...
@@ -138,6 +139,33 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
}
}
/*
/*
return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor
>> b - another tensor for division
*/
int
GetDivDimIndex
(
const
XTensor
&
a
,
const
XTensor
&
b
)
{
if
(
a
.
order
<
b
.
order
)
return
-
1
;
int
hitCount
=
0
;
int
hitDim
=
-
1
;
for
(
int
i
=
0
;
i
<
b
.
order
;
i
++
){
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
1
)
continue
;
else
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
a
.
dimSize
[
a
.
order
-
1
-
i
]){
hitCount
++
;
hitDim
=
a
.
order
-
b
.
order
+
i
;
}
}
if
(
hitCount
==
1
)
return
hitDim
;
else
return
-
1
;
}
/*
element-wise division of two tensors (return a XTensor structure)
element-wise division of two tensors (return a XTensor structure)
make a new tensor c to keep the result and return it
make a new tensor c to keep the result and return it
...
@@ -146,22 +174,40 @@ where i is the index of the item
...
@@ -146,22 +174,40 @@ where i is the index of the item
>> a - tensor a
>> a - tensor a
>> b - tensor b
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
>> leadingDim - the dimension along which we perform broadcasting
<< return - the product of the tensors
<< return - the product of the tensors
*/
*/
XTensor
Div
(
const
XTensor
&
a
,
const
XTensor
&
b
,
int
leadingDim
)
XTensor
Div
(
const
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
,
int
leadingDim
)
{
{
CheckNTErrors
(
a
.
dimSize
[
leadingDim
]
==
b
.
dimSize
[
leadingDim
],
"TODO!"
);
XTensor
c
(
&
a
);
XTensor
c
(
&
a
);
c
.
SetTMP
();
c
.
SetTMP
();
/* call _Multiply function */
int
n
=
GetDivDimIndex
(
a
,
b
);
_Div
(
&
a
,
&
b
,
&
c
,
0
,
leadingDim
);
if
(
n
==
-
1
){
CheckNTErrors
(
a
.
dimSize
[
leadingDim
]
==
b
.
dimSize
[
leadingDim
],
"TODO!"
);
/* call _Div function */
_Div
(
&
a
,
&
b
,
&
c
,
alpha
,
leadingDim
);
/* tensor connections */
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIV
);
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIV
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
}
else
if
(
n
>=
0
&&
n
<
a
.
order
){
/* call _DivDim function */
_DivDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_DIVDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHeadInt
(
&
c
,
alpha
);
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
}
return
c
;
return
c
;
}
}
...
...
source/tensor/core/arithmetic/Div.h
查看文件 @
f7f33b29
...
@@ -47,7 +47,7 @@ make a new tensor to keep the result and return it
...
@@ -47,7 +47,7 @@ make a new tensor to keep the result and return it
c(i) = a(i)/b(i)
c(i) = a(i)/b(i)
where i is the index of the element
where i is the index of the element
*/
*/
XTensor
Div
(
const
XTensor
&
a
,
const
XTensor
&
b
,
int
leadingDim
=
0
);
XTensor
Div
(
const
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/Multiply.cpp
查看文件 @
f7f33b29
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include "../../XName.h"
#include "../../XName.h"
#include "Multiply.h"
#include "Multiply.h"
#include "Multiply.cuh"
#include "Multiply.cuh"
#include "MultiplyDim.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
@@ -139,6 +140,33 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
...
@@ -139,6 +140,33 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
}
}
/*
/*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor
>> b - another tensor for multiplication
*/
int
GetMultiplyDimIndex
(
const
XTensor
&
a
,
const
XTensor
&
b
)
{
if
(
a
.
order
<
b
.
order
)
return
-
1
;
int
hitCount
=
0
;
int
hitDim
=
-
1
;
for
(
int
i
=
0
;
i
<
b
.
order
;
i
++
){
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
1
)
continue
;
else
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
a
.
dimSize
[
a
.
order
-
1
-
i
]){
hitCount
++
;
hitDim
=
a
.
order
-
b
.
order
+
i
;
}
}
if
(
hitCount
==
1
)
return
hitDim
;
else
return
-
1
;
}
/*
element-wise product of two tensors (return a XTensor structure)
element-wise product of two tensors (return a XTensor structure)
make a new tensor c to keep the result and return it
make a new tensor c to keep the result and return it
...
@@ -150,19 +178,37 @@ where i is the index of the item
...
@@ -150,19 +178,37 @@ where i is the index of the item
>> leadingDim - the dimension along which we perform broadcasting
>> leadingDim - the dimension along which we perform broadcasting
<< return - the product of the tensors
<< return - the product of the tensors
*/
*/
XTensor
Multiply
(
const
XTensor
&
a
,
const
XTensor
&
b
,
int
leadingDim
)
XTensor
Multiply
(
const
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
,
int
leadingDim
)
{
{
CheckNTErrors
(
a
.
dimSize
[
leadingDim
]
==
b
.
dimSize
[
leadingDim
],
"TODO!"
);
XTensor
c
(
&
a
);
XTensor
c
(
&
a
);
c
.
SetTMP
();
c
.
SetTMP
();
int
n
=
GetMultiplyDimIndex
(
a
,
b
);
if
(
n
==
-
1
){
CheckNTErrors
(
a
.
dimSize
[
leadingDim
]
==
b
.
dimSize
[
leadingDim
],
"TODO!"
);
/* call _Multiply function */
/* call _Multiply function */
_Multiply
(
&
a
,
&
b
,
&
c
,
0
,
leadingDim
);
_Multiply
(
&
a
,
&
b
,
&
c
,
0
,
leadingDim
);
/* tensor connections */
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLY
);
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLY
);
XLink
::
AddParamToHead
(
&
c
,
alpha
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
XLink
::
AddParamToHeadInt
(
&
c
,
leadingDim
);
}
else
if
(
n
>=
0
&&
n
<
a
.
order
){
/* call _MultiplyDim function */
_MultiplyDim
(
&
a
,
&
b
,
&
c
,
n
,
alpha
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_MULTIPLYDIM
);
XLink
::
AddParamToHeadInt
(
&
c
,
n
);
XLink
::
AddParamToHeadInt
(
&
c
,
alpha
);
}
else
{
ShowNTErrors
(
"Something is wrong!"
);
}
return
c
;
return
c
;
}
}
...
...
source/tensor/core/arithmetic/Multiply.h
查看文件 @
f7f33b29
...
@@ -47,7 +47,7 @@ make a new tensor to keep the result and return it
...
@@ -47,7 +47,7 @@ make a new tensor to keep the result and return it
c(i) = a(i)*b(i)
c(i) = a(i)*b(i)
where i is the index of the element
where i is the index of the element
*/
*/
XTensor
Multiply
(
const
XTensor
&
a
,
const
XTensor
&
b
,
int
leadingDim
=
0
);
XTensor
Multiply
(
const
XTensor
&
a
,
const
XTensor
&
b
,
DTYPE
alpha
=
0
.
0
,
int
leadingDim
=
0
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/Sub.cpp
查看文件 @
f7f33b29
...
@@ -135,15 +135,14 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
...
@@ -135,15 +135,14 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
if
(
a
.
order
<
b
.
order
)
if
(
a
.
order
<
b
.
order
)
return
-
1
;
return
-
1
;
if
(
XTensor
::
IsSameShaped
(
&
a
,
&
b
))
return
-
1
;
int
hitCount
=
0
;
int
hitCount
=
0
;
int
hitDim
=
-
1
;
int
hitDim
=
-
1
;
for
(
int
i
=
0
;
i
<
a
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
b
.
order
;
i
++
){
if
(
a
.
dimSize
[
i
]
==
b
.
unitNum
){
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
1
)
hitDim
=
i
;
continue
;
else
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
a
.
dimSize
[
a
.
order
-
1
-
i
]){
hitCount
++
;
hitCount
++
;
hitDim
=
a
.
order
-
b
.
order
+
i
;
}
}
}
}
...
@@ -173,7 +172,6 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
...
@@ -173,7 +172,6 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
/* call _Sub function */
/* call _Sub function */
_Sub
(
&
a
,
&
b
,
&
c
,
beta
);
_Sub
(
&
a
,
&
b
,
&
c
,
beta
);
/* tensor connections */
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUB
);
XLink
::
MakeLink
(
&
a
,
&
b
,
&
c
,
MATH_SUB
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
XLink
::
AddParamToHead
(
&
c
,
beta
);
...
...
source/tensor/core/arithmetic/SubDim.cu
查看文件 @
f7f33b29
source/tensor/core/arithmetic/Sum.cpp
查看文件 @
f7f33b29
...
@@ -137,37 +137,17 @@ return a dimension if the sum is performed as SumDim (in more details in SumDim.
...
@@ -137,37 +137,17 @@ return a dimension if the sum is performed as SumDim (in more details in SumDim.
*/
*/
int
GetSumDimIndex
(
const
XTensor
&
a
,
const
XTensor
&
b
)
int
GetSumDimIndex
(
const
XTensor
&
a
,
const
XTensor
&
b
)
{
{
//if(a.order < b.order)
// return -1;
//int hitCount = 0;
//int hitDim = -1;
//for(int i = 0; i < b.order; i++){
// if(b.dimSize[b.order - 1 - i] == 1)
// continue;
// else if(b.dimSize[b.order - 1 - i] == a.dimSize[a.order - 1 - i]){
// hitCount++;
// hitDim = a.order - b.order + i;
// }
//}
//if(hitCount == 1)
// return hitDim;
//else
// return -1;
if
(
a
.
order
<
b
.
order
)
if
(
a
.
order
<
b
.
order
)
return
-
1
;
return
-
1
;
if
(
XTensor
::
IsSameShaped
(
&
a
,
&
b
))
return
-
1
;
int
hitCount
=
0
;
int
hitCount
=
0
;
int
hitDim
=
-
1
;
int
hitDim
=
-
1
;
for
(
int
i
=
0
;
i
<
a
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
b
.
order
;
i
++
){
if
(
a
.
dimSize
[
i
]
==
b
.
unitNum
){
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
1
)
hitDim
=
i
;
continue
;
else
if
(
b
.
dimSize
[
b
.
order
-
1
-
i
]
==
a
.
dimSize
[
a
.
order
-
1
-
i
]){
hitCount
++
;
hitCount
++
;
hitDim
=
a
.
order
-
b
.
order
+
i
;
}
}
}
}
...
...
source/tensor/core/getandset/Select.cpp
查看文件 @
f7f33b29
...
@@ -49,7 +49,7 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
...
@@ -49,7 +49,7 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
for
(
int
i
=
0
;
i
<
a
->
order
;
i
++
){
for
(
int
i
=
0
;
i
<
a
->
order
;
i
++
){
if
(
i
==
dim
){
if
(
i
==
dim
){
CheckNTErrors
(
low
>
0
&&
low
<
a
->
dimSize
[
dim
],
"Illegal range specified!"
);
CheckNTErrors
(
low
>
=
0
&&
low
<
a
->
dimSize
[
dim
],
"Illegal range specified!"
);
CheckNTErrors
(
high
>
0
&&
high
<=
a
->
dimSize
[
dim
],
"Illegal range specified!"
);
CheckNTErrors
(
high
>
0
&&
high
<=
a
->
dimSize
[
dim
],
"Illegal range specified!"
);
}
}
else
{
else
{
...
@@ -101,7 +101,7 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
...
@@ -101,7 +101,7 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
for
(
int
i
=
0
;
i
<
a
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
a
.
order
;
i
++
){
if
(
i
==
dim
){
if
(
i
==
dim
){
CheckNTErrors
(
low
>
0
&&
low
<
a
.
dimSize
[
dim
],
"Illegal range specified!"
);
CheckNTErrors
(
low
>
=
0
&&
low
<
a
.
dimSize
[
dim
],
"Illegal range specified!"
);
CheckNTErrors
(
high
>
0
&&
high
<=
a
.
dimSize
[
dim
],
"Illegal range specified!"
);
CheckNTErrors
(
high
>
0
&&
high
<=
a
.
dimSize
[
dim
],
"Illegal range specified!"
);
dimSize
[
i
]
=
high
-
low
;
dimSize
[
i
]
=
high
-
low
;
}
}
...
...
source/tensor/core/getandset/SetData.cpp
查看文件 @
f7f33b29
...
@@ -214,6 +214,106 @@ void _SetDataFixedDouble(XTensor * tensor, double p)
...
@@ -214,6 +214,106 @@ void _SetDataFixedDouble(XTensor * tensor, double p)
}
}
/*
/*
set data items along with a given dimension (and keep the remaining items unchanged)
>> tensor - the tensor whose data array would be initialized
>> beg - the beginning position
>> len - length along with the given dimension
>> dim - the dimension along which we set the data
e.g., given a 3 * 3 tensor
1 2 3
4 5 6
7 8 9
when beg = 1, len = 1, dim = 0 and p = 0, we have
1 2 3
0 0 0
7 8 9
i.e., we set all entries of row 1 to 0
*/
void
_SetDataDim
(
XTensor
*
tensor
,
int
beg
,
int
len
,
int
dim
,
DTYPE
p
)
{
int
n
=
tensor
->
order
;
CheckNTErrors
(
tensor
->
dataType
==
DEFAULT_DTYPE
,
"TODO!"
);
CheckNTErrors
(
dim
<
n
&&
dim
>
0
,
"Illegal dimension!"
);
CheckNTErrors
(
beg
>=
0
&&
beg
<
tensor
->
GetDim
(
dim
),
"Illegal beginning position!"
);
CheckNTErrors
(
beg
+
len
>=
0
&&
beg
+
len
<
tensor
->
GetDim
(
dim
),
"Illegal length!"
);
if
(
tensor
->
devID
<
0
){
int
stride
=
1
;
int
blockSize
=
1
;
int
blockNum
=
1
;
for
(
int
i
=
n
-
1
;
i
>
dim
;
i
--
){
stride
*=
tensor
->
GetDim
(
i
);
}
blockSize
=
stride
*
tensor
->
GetDim
(
dim
);
blockNum
=
tensor
->
unitNum
/
blockSize
;
int
l
=
len
*
stride
;
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
DTYPE
*
d
=
(
DTYPE
*
)
tensor
->
data
+
blockSize
*
i
+
beg
*
stride
;
for
(
int
j
=
0
;
j
<
l
;
j
++
)
d
[
j
]
=
p
;
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataDim
(
tensor
,
beg
,
len
,
dim
,
p
);
#endif
}
}
/*
generate data as lower triangular matrics for last two dimensions
>> tensor - the tensor whose data to be set
>> p - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal
e.g., for a 3* 3 tensor,
when p = 1 ans shift = 0, we have
1 0 0
1 1 0
1 1 1
when p = 2 and shift = -1, we have
0 0 0
2 0 0
2 2 0
*/
void
_SetDataLowTri
(
XTensor
*
tensor
,
DTYPE
p
,
int
shift
)
{
int
n
=
tensor
->
order
;
CheckNTErrors
(
tensor
->
dataType
==
DEFAULT_DTYPE
,
"TODO!"
);
CheckNTErrors
(
n
>=
2
,
"The tensor must have a order no less than 2!"
);
CheckNTErrors
(
tensor
->
GetDim
(
n
-
1
)
==
tensor
->
GetDim
(
n
-
2
),
"The last two dimensions must be of the same size!"
);
if
(
tensor
->
devID
<
0
){
int
l
=
tensor
->
GetDim
(
-
1
);
int
blockNum
=
1
;
int
blockSize
=
l
*
l
;
for
(
int
i
=
0
;
i
<
n
-
2
;
i
++
)
blockNum
*=
tensor
->
GetDim
(
i
);
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
){
DTYPE
*
d
=
(
DTYPE
*
)
tensor
->
data
+
i
*
blockSize
;
for
(
int
row
=
0
;
row
<
l
;
row
++
){
for
(
int
col
=
0
;
col
<=
row
+
shift
;
col
++
){
d
[
row
*
l
+
col
]
=
p
;
}
for
(
int
col
=
MAX
(
0
,
row
+
shift
+
1
);
col
<
l
;
col
++
){
d
[
row
*
l
+
col
]
=
0
;
}
}
}
}
else
{
#ifdef USE_CUDA
_CudaSetDataLowTri
(
tensor
,
p
,
shift
);
#endif
}
}
/*
generate data items with a uniform distribution in [lower, upper]
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> lower - lower value of the range
...
...
source/tensor/core/getandset/SetData.cu
查看文件 @
f7f33b29
...
@@ -185,6 +185,169 @@ void KernelSetDataRandDouble(double * d, int size, DTYPE lower, DTYPE variance)
...
@@ -185,6 +185,169 @@ void KernelSetDataRandDouble(double * d, int size, DTYPE lower, DTYPE variance)
}
}
/*
/*
set data items along with a given dimension (and keep the remaining items unchanged) - kernel version
>> tensor - the tensor whose data array would be initialized
>> beg - the beginning position
>> len - length of the segment to be set
>> blockSize - size of a data block
>> blockNum - number of data blocks
*/
__global__
void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum)
return;
if(i < beg || i >= beg + len)
return;
d[blockSize * j + i] = p;
}
/*
set data items along with a given dimension (and keep the remaining items unchanged) - cuda version
>> tensor - the tensor whose data array would be initialized
>> beg - the beginning position
>> len - length along with the given dimension
>> dim - the dimension along which we set the data
e.g., given a 3 * 3 tensor
1 2 3
4 5 6
7 8 9
when beg = 1, len = 1, dim = 0 and p = 0, we have
1 2 3
0 0 0
7 8 9
i.e., we set all entries of row 1 to 0
*/
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim > 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = n - 1; i > dim; i--){
stride *= tensor->GetDim(i);
}
blockSize = stride * tensor->GetDim(dim);
blockNum = tensor->unitNum / blockSize;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(tensor->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride, len * stride, blockSize, blockNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set lower triangular matrics for each block
>> d - pointer to the data array
>> l - row number (or column number) of each block, i.e,
a block is l * l matrix
>> blockSize - size of each block (blockSize = l * l)
>> blockNum - number of the blocks
>> p - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal
e.g., for a 3* 3 tensor,
when p = 1 ans shift = 0, we have
1 0 0
1 1 0
1 1 1
when p = 2 and shift = -1, we have
0 0 0
2 0 0
2 2 0
*/
__global__
void _KernelSetDataLowTri(DTYPE * d, int l, int blockSize, int blockNum, DTYPE p, int shift)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum)
return;
int row = i / l;
int col = i % l;
DTYPE * d2 = d + blockSize * j + row * l + col;
if(col <= row + shift)
*d2 = p;
else
*d2 = 0;
}
/*
generate data as lower triangular matrics for last two dimensions (cuda version)
>> tensor - the tensor whose data to be set
>> p - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal
e.g., for a 3* 3 tensor,
when p = 1 ans shift = 0, we have
1 0 0
1 1 0
1 1 1
when p = 2 and shift = -1, we have
0 0 0
2 0 0
2 2 0
*/
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(n >= 2, "The tensor must have a order no less than 2!");
CheckNTErrors(tensor->GetDim(n - 1) == tensor->GetDim(n - 2),
"The last two dimensions must be of the same size!");
int l = tensor->GetDim(-1);
int blockNum = 1;
int blockSize = l * l;
for(int i = 0; i < n - 2; i++)
blockNum *= tensor->GetDim(i);
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(tensor->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
_KernelSetDataLowTri<<<blocks, threads >>>((DTYPE*)tensor->data, l, blockSize, blockNum, p, shift);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
generate data items with a uniform distribution in [lower, upper]
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> lower - lower value of the range
...
...
source/tensor/core/getandset/SetData.cuh
查看文件 @
f7f33b29
...
@@ -37,6 +37,12 @@ void _CudaSetDataFixedFloat(XTensor * tensor, float p);
...
@@ -37,6 +37,12 @@ void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
/* generate data as lower triangular matrics for last two dimensions (cuda version) */
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */
/* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
...
...
source/tensor/core/getandset/SetData.h
查看文件 @
f7f33b29
...
@@ -45,6 +45,12 @@ void _SetDataFixedFloat(XTensor * tensor, float p);
...
@@ -45,6 +45,12 @@ void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
/* generate data items with a fixed value p (in double) */
void
_SetDataFixedDouble
(
XTensor
*
tensor
,
double
p
);
void
_SetDataFixedDouble
(
XTensor
*
tensor
,
double
p
);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void
_SetDataDim
(
XTensor
*
tensor
,
int
beg
,
int
len
,
int
dim
,
DTYPE
p
);
/* generate data as lower triangular matrics for last two dimensions */
void
_SetDataLowTri
(
XTensor
*
tensor
,
DTYPE
p
,
int
shift
);
/* generate data items with a uniform distribution in [lower, upper] */
/* generate data items with a uniform distribution in [lower, upper] */
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
void
_SetDataRand
(
XTensor
*
tensor
,
DTYPE
lower
,
DTYPE
upper
);
...
...
source/tensor/test/TSubDim.cpp
0 → 100644
查看文件 @
f7f33b29
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-13
*/
#include "TSubDim.h"
#include "../core/arithmetic/SubDim.h"
#include "../XTensor.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
case 1: tensor subtraction c = a - b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting
*/
bool
TestSubDim1
()
{
/* a tensor of size (2, 4) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
2
;
aDimSize
[
1
]
=
4
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (2) */
int
bOrder
=
1
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
2
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
]
=
{
1.0
F
,
-
1.0
F
};
DTYPE
answer
[
2
][
4
]
=
{
{
-
1.0
F
,
0.0
F
,
1.0
F
,
2.0
F
},
{
5.0
F
,
6.0
F
,
7.0
F
,
8.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
bOrder
,
bDimSize
);
XTensor
*
c
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
cMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
cUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
cMe
->
SetData
(
aData
,
aUnitNum
);
b
->
SetData
(
bData
,
bUnitNum
);
c
->
SetZeroAll
();
/* call SubDim function */
_SubDim
(
a
,
b
,
c
,
0
);
_SubDim
(
cMe
,
b
,
0
);
cUser
=
SubDim
(
*
a
,
*
b
,
0
);
/* check results */
cpuTest
=
c
->
CheckData
(
answer
,
aUnitNum
)
&&
cMe
->
CheckData
(
answer
,
aUnitNum
)
&&
cUser
.
CheckData
(
answer
,
aUnitNum
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
cMeGPU
->
SetData
(
aData
,
aUnitNum
);
bGPU
->
SetData
(
bData
,
bUnitNum
);
cGPU
->
SetZeroAll
();
/* call sub function */
_SubDim
(
aGPU
,
bGPU
,
cGPU
,
0
);
_SubDim
(
cMeGPU
,
bGPU
,
0
);
cUserGPU
=
SubDim
(
*
aGPU
,
*
bGPU
,
0
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 2: tensor subtraction c = a - b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting
*/
bool
TestSubDim2
()
{
/* a tensor of size (2, 4) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
2
;
aDimSize
[
1
]
=
4
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (2, 2) */
int
bOrder
=
2
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
2
;
bDimSize
[
1
]
=
2
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
2
]
=
{
{
1.0
F
,
-
1.0
F
},
{
-
1.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
-
1.0
F
,
2.0
F
,
3.0
F
,
2.0
F
},
{
3.0
F
,
6.0
F
,
7.0
F
,
6.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
bOrder
,
bDimSize
);
XTensor
*
c
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
cMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
cUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
cMe
->
SetData
(
aData
,
aUnitNum
);
b
->
SetData
(
bData
,
bUnitNum
);
c
->
SetZeroAll
();
/* call SubDim function */
_SubDim
(
a
,
b
,
c
,
1
);
_SubDim
(
cMe
,
b
,
1
);
cUser
=
SubDim
(
*
a
,
*
b
,
1
);
/* check results */
cpuTest
=
c
->
CheckData
(
answer
,
aUnitNum
)
&&
cMe
->
CheckData
(
answer
,
aUnitNum
)
&&
cUser
.
CheckData
(
answer
,
aUnitNum
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
cMeGPU
->
SetData
(
aData
,
aUnitNum
);
bGPU
->
SetData
(
bData
,
bUnitNum
);
cGPU
->
SetZeroAll
();
/* call sub function */
_SubDim
(
aGPU
,
bGPU
,
cGPU
,
1
);
_SubDim
(
cMeGPU
,
bGPU
,
1
);
cUserGPU
=
SubDim
(
*
aGPU
,
*
bGPU
,
1
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
,
aUnitNum
);
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SubDim Function */
bool
TestSubDim
()
{
XPRINT
(
0
,
stdout
,
"[TEST SUBDIM] tensor subtraction c = a - b * beta by broadcasting
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
caseFlag
=
TestSubDim1
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestSubDim2
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* other cases test */
/*
TODO!!
*/
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
XPRINT
(
0
,
stdout
,
"
\n
"
);
return
returnFlag
;
}
}
// namespace nts(NiuTrans.Tensor)
source/tensor/test/TSubDim.h
0 → 100644
查看文件 @
f7f33b29
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-13
*/
#ifndef __TEST_SUBDIM_H__
#define __TEST_SUBDIM_H__
#include "../core/arithmetic/SubDim.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* test for SubDim Function */
bool
TestSubDim
();
}
// namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUBDIM_H__
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论