Commit daf4765a by linye

update hardtanh

parent fe868e5c
......@@ -17,7 +17,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-04 float16 added
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16 added
*/
#include "HardTanH.h"
......@@ -105,14 +105,15 @@ dy/dx = 1 if -1 <= x <= 1
>> x - x of the function
>> size - size of y/x
*/
template <class T>
__global__
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
void KernelHardtanhBackward(T * dedy, T * dedx, T * gold, T * y, T * x, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
DTYPE s = x[i];
if(s > (DTYPE)1.0 || s < (DTYPE)-1.0)
T s = x[i];
if(s > (T)1.0 || s < (T)-1.0)
dedx[i] = 0;
else
dedx[i] = dedy[i];
......@@ -142,21 +143,24 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName)
{
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
CheckNTErrors(((x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) ||
(x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16)),
"Input vectors are not in default type.");
/* calculate dE/dy */
if(lossName == CROSSENTROPY)
_CudaCrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_CudaLossBackward(dedy, gold, y, lossName);
/* calculate dE/dy */
if (lossName == CROSSENTROPY)
_CudaCrossEntropyBackward(dedy, y, gold);
else if (lossName != NOLOSS)
_CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
int devIDBackup;
ProtectCudaDev(x->devID, devIDBackup);
int devIDBackup;
ProtectCudaDev(x->devID, devIDBackup);
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* dE/dx = dE/dy * dy/dx */
KernelHardtanhBackward<<<dim3(gridSize[0]),dim3(blockSize[0])>>>
((DTYPE*)dedy->data,
......@@ -164,11 +168,18 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
gold == NULL ? NULL : (DTYPE*)gold->data,
(DTYPE*)y->data, (DTYPE*)x->data,
x->unitNum);
BacktoCudaDev(x->devID, devIDBackup);
}
else
ShowNTErrors("TODO!");
else if (x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16) {
/* dE/dx = dE/dy * dy/dx */
KernelHardtanhBackward<<<dim3(gridSize[0]), dim3(blockSize[0])>>>
((half*)dedy->data,
(half*)dedx->data,
gold == NULL ? NULL : (half*)gold->data,
(half*)y->data, (half*)x->data,
x->unitNum);
}
BacktoCudaDev(x->devID, devIDBackup);
}
#endif
......
......@@ -222,8 +222,9 @@ backward compuation for squared error (Cuda kernel)
>> y - model output (in vector)
>> size - size of the vector (dedy)
*/
template <class T>
__global__
void KernelLossBackwardSquaredError(DTYPE * dedy, DTYPE * t, DTYPE * y, int size)
void KernelLossBackwardSquaredError(T * dedy, T * t, T * y, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -242,8 +243,9 @@ backward compuation of blocks for squared error (Cuda kernel)
>> lenInBlock - number of items in a block for computation
>> size - size of the vector (dedy)
*/
template <class T>
__global__
void KernelLossBackwardSquaredErrorBlock(DTYPE * dedy, DTYPE * t, DTYPE * y,
void KernelLossBackwardSquaredErrorBlock(T * dedy, T * t, T * y,
int blockSize, int begInBlock, int lenInBlock, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -265,8 +267,9 @@ backward compuation for cross entropy (Cuda kernel)
>> y - model output (in vector)
>> size - size of the vector (dedy)
*/
template <class T>
__global__
void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
void KernelLossBackwardCrossEntropy(T * dedy, T * t, T * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i > stride * dimensionSize * blockNum)
......@@ -297,8 +300,9 @@ backward compuation for cross entropy (Cuda kernel)
>> lenInBlock - number of items in a block for computation
>> size - size of the vector (dedy)
*/
template <class T>
__global__
void KernelLossBackwardCrossEntropyBlock(DTYPE * dedy, DTYPE * t, DTYPE * y,
void KernelLossBackwardCrossEntropyBlock(T * dedy, T * t, T * y,
int blockSize, int begInBlock, int lenInBlock, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -337,14 +341,8 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)),
"Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE &&
y->dataType == DEFAULT_DTYPE &&
dedy->dataType == DEFAULT_DTYPE),
"Input vectors are not in default type.");
CheckNTErrors((dedy->devID >= 0 && t->devID >= 0 && y->devID >= 0),
"The backward compuation must be performed on GPUs.");
CheckNTErrors((dedy->devID == t->devID && dedy->devID == y->devID),
"The vectors must be on the same GPU.");
CheckNTErrors((tBeg == yBeg), "TODO!");
......@@ -376,51 +374,105 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
dim3 blocks(cudaGridSize[0]);
dim3 threads(cudaBlockSize[0]);
DTYPE * tp = (DTYPE*)t->data;
DTYPE * yp = (DTYPE*)y->data;
DTYPE * dedyp = (DTYPE*)dedy->data;
int devIDBackup;
ProtectCudaDev(y->devID, devIDBackup);
/*
squared error
loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
dloss/dy_i = y_i - t_i
*/
if(LFName == SQUAREDERROR){
if(t->isSparse){
ShowNTErrors("TODO!");
}
else if(size == y->unitNum){
KernelLossBackwardSquaredError<<<blocks, threads>>>(dedyp, tp, yp, y->unitNum);
}
else{
KernelLossBackwardSquaredErrorBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
if (t->dataType == DEFAULT_DTYPE &&
y->dataType == DEFAULT_DTYPE &&
dedy->dataType == DEFAULT_DTYPE) {
DTYPE * tp = (DTYPE*)t->data;
DTYPE * yp = (DTYPE*)y->data;
DTYPE * dedyp = (DTYPE*)dedy->data;
int devIDBackup;
ProtectCudaDev(y->devID, devIDBackup);
/*
squared error
loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
dloss/dy_i = y_i - t_i
*/
if (LFName == SQUAREDERROR) {
if (t->isSparse) {
ShowNTErrors("TODO!");
}
else if (size == y->unitNum) {
KernelLossBackwardSquaredError << <blocks, threads >> >(dedyp, tp, yp, y->unitNum);
}
else {
KernelLossBackwardSquaredErrorBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
}
/*
cross entropy
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i
*/
else if(LFName == CROSSENTROPY){
if(t->isSparse){
ShowNTErrors("TODO!");
}
else if(size == y->unitNum){
KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
/*
cross entropy
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i
*/
else if (LFName == CROSSENTROPY) {
if (t->isSparse) {
ShowNTErrors("TODO!");
}
else if (size == y->unitNum) {
KernelLossBackwardCrossEntropy << <blocks, threads >> >(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
}
else {
KernelLossBackwardCrossEntropyBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
BacktoCudaDev(y->devID, devIDBackup);
}
else if (t->dataType == X_FLOAT16 &&
y->dataType == X_FLOAT16 &&
dedy->dataType == X_FLOAT16) {
half * tp = (half*)t->data;
half * yp = (half*)y->data;
half * dedyp = (half*)dedy->data;
int devIDBackup;
ProtectCudaDev(y->devID, devIDBackup);
/*
squared error
loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
dloss/dy_i = y_i - t_i
*/
if (LFName == SQUAREDERROR) {
if (t->isSparse) {
ShowNTErrors("TODO!");
}
else if (size == y->unitNum) {
KernelLossBackwardSquaredError << <blocks, threads >> >(dedyp, tp, yp, y->unitNum);
}
else {
KernelLossBackwardSquaredErrorBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
else{
KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
/*
cross entropy
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i
*/
else if (LFName == CROSSENTROPY) {
if (t->isSparse) {
ShowNTErrors("TODO!");
}
else if (size == y->unitNum) {
KernelLossBackwardCrossEntropy << <blocks, threads >> >(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
}
else {
KernelLossBackwardCrossEntropyBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
BacktoCudaDev(y->devID, devIDBackup);
}
else{
ShowNTErrors("TODO");
}
BacktoCudaDev(y->devID, devIDBackup);
}
#endif
......
......@@ -82,9 +82,9 @@ bool Test()
//wrong = !TestCrossEntropy() || wrong;
//wrong = !TestDropout() || wrong;
//wrong = !TestHardTanH() || wrong;
wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong;
wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong;
//wrong = !TestSigmoid() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论