Commit 3a3b625a by xiaotong

backward propagation for activation functions

parent 28f66400
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for activation function
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
* Dingdang won 5 games in the GO training yesterday, hahaha ...
*/
#include "XNoder.h"
#include "XBackwardFunc.h"
#include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(node->grad != NULL, "No gradient found!");
CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
XTensor * input = income.tails[0];
XTensor * output = node;
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0);
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0);
_SoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else{
ShowNTErrors("Wrong activation function type!");
}
}
/* indicates whether the node is for an activation function */
bool XFuncGrad::IsFunc(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & FUNCTION_BASE) != 0;
}
}
......@@ -29,6 +29,19 @@
namespace nts{
/* this class computes the gradient for activation functions given a node */
class XFuncGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for an activation function */
static
bool IsFunc(XTensor * node);
};
}
#endif
\ No newline at end of file
......@@ -23,6 +23,7 @@
#include "XNoder.h"
#include "XBackwardLoss.h"
#include "XBackwardMath.h"
#include "XBackwardFunc.h"
#include "../tensor/XName.h"
namespace nts{
......@@ -143,6 +144,11 @@ void XNet::BackwardNode(XTensor * node)
if(!XNoder::IsLeaf(node)){
if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node);
else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node);
else{
ShowNTErrors("Wrong node type!");
}
}
node->visitMark = NODE_FINISHED;
......
......@@ -208,7 +208,7 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
<< return - the result of matrix multiplication
*/
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{
CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!");
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
......
......@@ -47,9 +47,9 @@ dE/dx = dE/dy * dy/dx = dE/dy
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!");
......
......@@ -33,9 +33,9 @@ void _Identity(const XTensor * x, XTensor * y);
/* de/dx */
extern "C"
void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -76,16 +76,16 @@ rectified: y = 0 if x < 0
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!");
#ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){
CudaRectifyBackward(gold, y, x, dedy, dedx, lossName);
_CudaRectifyBackward(gold, y, x, dedy, dedx, lossName);
return;
}
#endif
......
......@@ -126,9 +126,9 @@ rectify : y = s if s >= 0
>> oBeg - where to start in the model output (along the leading dimension)
>> parallelRunner - parallel processing module
*/
void CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
......
......@@ -35,9 +35,9 @@ void _CudaRectify(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */
extern "C"
void CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA
......
......@@ -33,9 +33,9 @@ void _Rectify(const XTensor * x, XTensor * y);
/* de/dx */
extern "C"
void RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -68,16 +68,16 @@ sigmoid: y = 1/(1+exp(-x))
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!");
#ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){
CudaSigmoidBackward(gold, y, x, dedy, dedx, lossName);
_CudaSigmoidBackward(gold, y, x, dedy, dedx, lossName);
return;
}
#endif
......
......@@ -122,9 +122,9 @@ sigmoid: y = 1/(1+exp(-x))
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */
......
......@@ -35,9 +35,9 @@ void _CudaSigmoid(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */
extern "C"
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA
......
......@@ -33,9 +33,9 @@ void _Sigmoid(const XTensor * x, XTensor * y);
/* de/dx */
extern "C"
void SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -154,10 +154,10 @@ See more details in LogSoftmaxBackward(...)
>> lossName - type of loss function, e.g., cross entropy
>> leadDim - leading dimension (along which we perform reduction)
*/
void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((dedx->isSparse == false), "The gradient tensor must be dense!");
CheckNTErrors((gold != NULL), "Incorrect x gold standard tensor!");
......@@ -165,7 +165,7 @@ void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA
if(y->devID >= 0){
CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
_CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
return;
}
#endif
......
......@@ -230,10 +230,10 @@ See more details in SoftmaxBackward
>> lossName - type of loss function, e.g., cross entropy
>> leadDim - leading dimension (along which we perform reduction)
*/
void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName)
{
CheckNTErrors((x->devID >= 0), "Backward computation of log softmax must be run on GPUs.");
CheckNTErrors((x->devID == y->devID), "Matrices used in log softmax are not on the same GPU.");
......
......@@ -39,10 +39,10 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
/* de/dx (Cuda version) */
extern "C"
void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA
......
......@@ -33,10 +33,10 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim);
/* de/dx */
extern "C"
void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -142,7 +142,7 @@ bool TestIdentity2()
_Identity(x, y);
/* call IdentityBackward function */
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
_IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -171,7 +171,7 @@ bool TestIdentity2()
_Identity(xGPU, yGPU);
/* call IdentityBackward function */
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
_IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
......@@ -147,7 +147,7 @@ bool TestRectify2()
_Rectify(x, y);
/* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
_RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -176,7 +176,7 @@ bool TestRectify2()
_Rectify(xGPU, yGPU);
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
_RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
......@@ -141,7 +141,7 @@ bool TestSigmoid2()
_Sigmoid(x, y);
/* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
_SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -170,7 +170,7 @@ bool TestSigmoid2()
_Sigmoid(xGPU, yGPU);
/* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
_SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
......@@ -142,7 +142,7 @@ bool TestSoftmax2()
_Softmax(x, y, 1);
/* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
_SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
......@@ -170,7 +170,7 @@ bool TestSoftmax2()
_Softmax(xGPU, yGPU, 1);
/* call SoftmaxBackward function */
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
_SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论