Commit 3a3b625a by xiaotong

backward propagation for activation functions

parent 28f66400
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* backward computation for activation function
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
* Dingdang won 5 games in the GO training yesterday, hahaha ...
*/
#include "XNoder.h"
#include "XBackwardFunc.h"
#include "../tensor/XName.h"
#include "../tensor/function/FHeader.h"
namespace nts{
/* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node)
{
XLink &income = node->income;
int operID = income.typeID;
CheckNTErrors(node->grad != NULL, "No gradient found!");
CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
XTensor * input = income.tails[0];
XTensor * output = node;
XNoder::MakeGrad(input);
if(operID == FUNC_HARDTANH)
_HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_IDENTITY)
_IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_LOGSOFTMAX){
int leadDim = income.GetParamInt(0);
_LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else if(operID == FUNC_RECTIFY)
_RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SIGMOID)
_SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
else if(operID == FUNC_SOFTMAX){
int leadDim = income.GetParamInt(0);
_SoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
}
else{
ShowNTErrors("Wrong activation function type!");
}
}
/* indicates whether the node is for an activation function */
bool XFuncGrad::IsFunc(XTensor * node)
{
XLink &income = node->income;
return (income.typeID & FUNCTION_BASE) != 0;
}
}
...@@ -29,6 +29,19 @@ ...@@ -29,6 +29,19 @@
namespace nts{ namespace nts{
/* this class computes the gradient for activation functions given a node */
class XFuncGrad
{
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
/* indicates whether the node is for an activation function */
static
bool IsFunc(XTensor * node);
};
} }
#endif #endif
\ No newline at end of file
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "XNoder.h" #include "XNoder.h"
#include "XBackwardLoss.h" #include "XBackwardLoss.h"
#include "XBackwardMath.h" #include "XBackwardMath.h"
#include "XBackwardFunc.h"
#include "../tensor/XName.h" #include "../tensor/XName.h"
namespace nts{ namespace nts{
...@@ -143,6 +144,11 @@ void XNet::BackwardNode(XTensor * node) ...@@ -143,6 +144,11 @@ void XNet::BackwardNode(XTensor * node)
if(!XNoder::IsLeaf(node)){ if(!XNoder::IsLeaf(node)){
if(XMathGrad::IsMathOP(node)) if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node); XMathGrad::MakeGrad(node);
else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node);
else{
ShowNTErrors("Wrong node type!");
}
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
......
...@@ -208,7 +208,7 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x ...@@ -208,7 +208,7 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
<< return - the result of matrix multiplication << return - the result of matrix multiplication
*/ */
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner) DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{ {
CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!"); CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!");
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
......
...@@ -47,9 +47,9 @@ dE/dx = dE/dy * dy/dx = dE/dy ...@@ -47,9 +47,9 @@ dE/dx = dE/dy * dy/dx = dE/dy
>> dedx - dE/dx >> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
*/ */
void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)), CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!"); "The tensors must be of the same size!");
......
...@@ -33,9 +33,9 @@ void _Identity(const XTensor * x, XTensor * y); ...@@ -33,9 +33,9 @@ void _Identity(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -76,16 +76,16 @@ rectified: y = 0 if x < 0 ...@@ -76,16 +76,16 @@ rectified: y = 0 if x < 0
>> dedx - dE/dx >> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
*/ */
void RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)), CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!"); "The tensors must be of the same size!");
#ifdef USE_CUDA #ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){ if(x->devID >= 0 || y->devID >= 0){
CudaRectifyBackward(gold, y, x, dedy, dedx, lossName); _CudaRectifyBackward(gold, y, x, dedy, dedx, lossName);
return; return;
} }
#endif #endif
......
...@@ -126,9 +126,9 @@ rectify : y = s if s >= 0 ...@@ -126,9 +126,9 @@ rectify : y = s if s >= 0
>> oBeg - where to start in the model output (along the leading dimension) >> oBeg - where to start in the model output (along the leading dimension)
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
*/ */
void CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
......
...@@ -35,9 +35,9 @@ void _CudaRectify(const XTensor * input, XTensor * output); ...@@ -35,9 +35,9 @@ void _CudaRectify(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
void CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -33,9 +33,9 @@ void _Rectify(const XTensor * x, XTensor * y); ...@@ -33,9 +33,9 @@ void _Rectify(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
void RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -68,16 +68,16 @@ sigmoid: y = 1/(1+exp(-x)) ...@@ -68,16 +68,16 @@ sigmoid: y = 1/(1+exp(-x))
>> dedx - dE/dx >> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
*/ */
void SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)), CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)),
"The tensors must be of the same size!"); "The tensors must be of the same size!");
#ifdef USE_CUDA #ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){ if(x->devID >= 0 || y->devID >= 0){
CudaSigmoidBackward(gold, y, x, dedy, dedx, lossName); _CudaSigmoidBackward(gold, y, x, dedy, dedx, lossName);
return; return;
} }
#endif #endif
......
...@@ -122,9 +122,9 @@ sigmoid: y = 1/(1+exp(-x)) ...@@ -122,9 +122,9 @@ sigmoid: y = 1/(1+exp(-x))
>> dedx - dE/dx >> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
*/ */
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */ /* calculate dE/dy */
......
...@@ -35,9 +35,9 @@ void _CudaSigmoid(const XTensor * input, XTensor * output); ...@@ -35,9 +35,9 @@ void _CudaSigmoid(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -33,9 +33,9 @@ void _Sigmoid(const XTensor * x, XTensor * y); ...@@ -33,9 +33,9 @@ void _Sigmoid(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
void SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -154,10 +154,10 @@ See more details in LogSoftmaxBackward(...) ...@@ -154,10 +154,10 @@ See more details in LogSoftmaxBackward(...)
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
*/ */
void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
int leadDim, int leadDim,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
CheckNTErrors((dedx->isSparse == false), "The gradient tensor must be dense!"); CheckNTErrors((dedx->isSparse == false), "The gradient tensor must be dense!");
CheckNTErrors((gold != NULL), "Incorrect x gold standard tensor!"); CheckNTErrors((gold != NULL), "Incorrect x gold standard tensor!");
...@@ -165,7 +165,7 @@ void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -165,7 +165,7 @@ void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int leadDimRDI = y->order - leadDim - 1; int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if(y->devID >= 0){ if(y->devID >= 0){
CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName); _CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
return; return;
} }
#endif #endif
......
...@@ -230,10 +230,10 @@ See more details in SoftmaxBackward ...@@ -230,10 +230,10 @@ See more details in SoftmaxBackward
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
*/ */
void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
int leadDim, int leadDim,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
CheckNTErrors((x->devID >= 0), "Backward computation of log softmax must be run on GPUs."); CheckNTErrors((x->devID >= 0), "Backward computation of log softmax must be run on GPUs.");
CheckNTErrors((x->devID == y->devID), "Matrices used in log softmax are not on the same GPU."); CheckNTErrors((x->devID == y->devID), "Matrices used in log softmax are not on the same GPU.");
......
...@@ -39,10 +39,10 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s ...@@ -39,10 +39,10 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
int leadDim, int leadDim,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -33,10 +33,10 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim); ...@@ -33,10 +33,10 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim);
/* de/dx */ /* de/dx */
extern "C" extern "C"
void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
int leadDim, int leadDim,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -142,7 +142,7 @@ bool TestIdentity2() ...@@ -142,7 +142,7 @@ bool TestIdentity2()
_Identity(x, y); _Identity(x, y);
/* call IdentityBackward function */ /* call IdentityBackward function */
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY); _IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */ /* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F) cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
...@@ -171,7 +171,7 @@ bool TestIdentity2() ...@@ -171,7 +171,7 @@ bool TestIdentity2()
_Identity(xGPU, yGPU); _Identity(xGPU, yGPU);
/* call IdentityBackward function */ /* call IdentityBackward function */
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); _IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F) gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
...@@ -147,7 +147,7 @@ bool TestRectify2() ...@@ -147,7 +147,7 @@ bool TestRectify2()
_Rectify(x, y); _Rectify(x, y);
/* call RectifyBackward function */ /* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY); _RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */ /* check results */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F) cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
...@@ -176,7 +176,7 @@ bool TestRectify2() ...@@ -176,7 +176,7 @@ bool TestRectify2()
_Rectify(xGPU, yGPU); _Rectify(xGPU, yGPU);
/* call rectifybackward function */ /* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); _RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F) gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
...@@ -141,7 +141,7 @@ bool TestSigmoid2() ...@@ -141,7 +141,7 @@ bool TestSigmoid2()
_Sigmoid(x, y); _Sigmoid(x, y);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY); _SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */ /* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F) cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
...@@ -170,7 +170,7 @@ bool TestSigmoid2() ...@@ -170,7 +170,7 @@ bool TestSigmoid2()
_Sigmoid(xGPU, yGPU); _Sigmoid(xGPU, yGPU);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); _SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F) gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
...@@ -142,7 +142,7 @@ bool TestSoftmax2() ...@@ -142,7 +142,7 @@ bool TestSoftmax2()
_Softmax(x, y, 1); _Softmax(x, y, 1);
/* call SoftmaxBackward function */ /* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY); _SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */ /* check result */
cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F) cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
...@@ -170,7 +170,7 @@ bool TestSoftmax2() ...@@ -170,7 +170,7 @@ bool TestSoftmax2()
_Softmax(xGPU, yGPU, 1); _Softmax(xGPU, yGPU, 1);
/* call SoftmaxBackward function */ /* call SoftmaxBackward function */
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY); _SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F) gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论