Commit daf4765a by linye

update hardtanh

parent fe868e5c
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-04 float16 added * $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16 added
*/ */
#include "HardTanH.h" #include "HardTanH.h"
...@@ -105,14 +105,15 @@ dy/dx = 1 if -1 <= x <= 1 ...@@ -105,14 +105,15 @@ dy/dx = 1 if -1 <= x <= 1
>> x - x of the function >> x - x of the function
>> size - size of y/x >> size - size of y/x
*/ */
template <class T>
__global__ __global__
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size) void KernelHardtanhBackward(T * dedy, T * dedx, T * gold, T * y, T * x, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){ if (i < size){
DTYPE s = x[i]; T s = x[i];
if(s > (DTYPE)1.0 || s < (DTYPE)-1.0) if(s > (T)1.0 || s < (T)-1.0)
dedx[i] = 0; dedx[i] = 0;
else else
dedx[i] = dedy[i]; dedx[i] = dedy[i];
...@@ -142,21 +143,24 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -142,21 +143,24 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ CheckNTErrors(((x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) ||
(x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16)),
"Input vectors are not in default type.");
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName == CROSSENTROPY) if (lossName == CROSSENTROPY)
_CudaCrossEntropyBackward(dedy, y, gold); _CudaCrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS) else if (lossName != NOLOSS)
_CudaLossBackward(dedy, gold, y, lossName); _CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3]; int gridSize[3], blockSize[3];
GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize); GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
int devIDBackup; int devIDBackup;
ProtectCudaDev(x->devID, devIDBackup); ProtectCudaDev(x->devID, devIDBackup);
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* dE/dx = dE/dy * dy/dx */ /* dE/dx = dE/dy * dy/dx */
KernelHardtanhBackward<<<dim3(gridSize[0]),dim3(blockSize[0])>>> KernelHardtanhBackward<<<dim3(gridSize[0]),dim3(blockSize[0])>>>
((DTYPE*)dedy->data, ((DTYPE*)dedy->data,
...@@ -164,11 +168,18 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -164,11 +168,18 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
gold == NULL ? NULL : (DTYPE*)gold->data, gold == NULL ? NULL : (DTYPE*)gold->data,
(DTYPE*)y->data, (DTYPE*)x->data, (DTYPE*)y->data, (DTYPE*)x->data,
x->unitNum); x->unitNum);
BacktoCudaDev(x->devID, devIDBackup);
} }
else else if (x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16) {
ShowNTErrors("TODO!"); /* dE/dx = dE/dy * dy/dx */
KernelHardtanhBackward<<<dim3(gridSize[0]), dim3(blockSize[0])>>>
((half*)dedy->data,
(half*)dedx->data,
gold == NULL ? NULL : (half*)gold->data,
(half*)y->data, (half*)x->data,
x->unitNum);
}
BacktoCudaDev(x->devID, devIDBackup);
} }
#endif #endif
......
...@@ -222,8 +222,9 @@ backward compuation for squared error (Cuda kernel) ...@@ -222,8 +222,9 @@ backward compuation for squared error (Cuda kernel)
>> y - model output (in vector) >> y - model output (in vector)
>> size - size of the vector (dedy) >> size - size of the vector (dedy)
*/ */
template <class T>
__global__ __global__
void KernelLossBackwardSquaredError(DTYPE * dedy, DTYPE * t, DTYPE * y, int size) void KernelLossBackwardSquaredError(T * dedy, T * t, T * y, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -242,8 +243,9 @@ backward compuation of blocks for squared error (Cuda kernel) ...@@ -242,8 +243,9 @@ backward compuation of blocks for squared error (Cuda kernel)
>> lenInBlock - number of items in a block for computation >> lenInBlock - number of items in a block for computation
>> size - size of the vector (dedy) >> size - size of the vector (dedy)
*/ */
template <class T>
__global__ __global__
void KernelLossBackwardSquaredErrorBlock(DTYPE * dedy, DTYPE * t, DTYPE * y, void KernelLossBackwardSquaredErrorBlock(T * dedy, T * t, T * y,
int blockSize, int begInBlock, int lenInBlock, int size) int blockSize, int begInBlock, int lenInBlock, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -265,8 +267,9 @@ backward compuation for cross entropy (Cuda kernel) ...@@ -265,8 +267,9 @@ backward compuation for cross entropy (Cuda kernel)
>> y - model output (in vector) >> y - model output (in vector)
>> size - size of the vector (dedy) >> size - size of the vector (dedy)
*/ */
template <class T>
__global__ __global__
void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize) void KernelLossBackwardCrossEntropy(T * dedy, T * t, T * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i > stride * dimensionSize * blockNum) if (i > stride * dimensionSize * blockNum)
...@@ -297,8 +300,9 @@ backward compuation for cross entropy (Cuda kernel) ...@@ -297,8 +300,9 @@ backward compuation for cross entropy (Cuda kernel)
>> lenInBlock - number of items in a block for computation >> lenInBlock - number of items in a block for computation
>> size - size of the vector (dedy) >> size - size of the vector (dedy)
*/ */
template <class T>
__global__ __global__
void KernelLossBackwardCrossEntropyBlock(DTYPE * dedy, DTYPE * t, DTYPE * y, void KernelLossBackwardCrossEntropyBlock(T * dedy, T * t, T * y,
int blockSize, int begInBlock, int lenInBlock, int size) int blockSize, int begInBlock, int lenInBlock, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -337,14 +341,8 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -337,14 +341,8 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)),
"Tensor must be on the same device!"); "Tensor must be on the same device!");
CheckNTErrors((t->order > leadDim), "Illegal leading dimension!"); CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE &&
y->dataType == DEFAULT_DTYPE &&
dedy->dataType == DEFAULT_DTYPE),
"Input vectors are not in default type.");
CheckNTErrors((dedy->devID >= 0 && t->devID >= 0 && y->devID >= 0), CheckNTErrors((dedy->devID >= 0 && t->devID >= 0 && y->devID >= 0),
"The backward compuation must be performed on GPUs."); "The backward compuation must be performed on GPUs.");
CheckNTErrors((dedy->devID == t->devID && dedy->devID == y->devID), CheckNTErrors((dedy->devID == t->devID && dedy->devID == y->devID),
"The vectors must be on the same GPU."); "The vectors must be on the same GPU.");
CheckNTErrors((tBeg == yBeg), "TODO!"); CheckNTErrors((tBeg == yBeg), "TODO!");
...@@ -376,51 +374,105 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -376,51 +374,105 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
dim3 blocks(cudaGridSize[0]); dim3 blocks(cudaGridSize[0]);
dim3 threads(cudaBlockSize[0]); dim3 threads(cudaBlockSize[0]);
DTYPE * tp = (DTYPE*)t->data; if (t->dataType == DEFAULT_DTYPE &&
DTYPE * yp = (DTYPE*)y->data; y->dataType == DEFAULT_DTYPE &&
DTYPE * dedyp = (DTYPE*)dedy->data; dedy->dataType == DEFAULT_DTYPE) {
int devIDBackup; DTYPE * tp = (DTYPE*)t->data;
ProtectCudaDev(y->devID, devIDBackup); DTYPE * yp = (DTYPE*)y->data;
DTYPE * dedyp = (DTYPE*)dedy->data;
/*
squared error int devIDBackup;
loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output ProtectCudaDev(y->devID, devIDBackup);
dloss/dy_i = y_i - t_i
*/ /*
if(LFName == SQUAREDERROR){ squared error
if(t->isSparse){ loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
ShowNTErrors("TODO!"); dloss/dy_i = y_i - t_i
} */
else if(size == y->unitNum){ if (LFName == SQUAREDERROR) {
KernelLossBackwardSquaredError<<<blocks, threads>>>(dedyp, tp, yp, y->unitNum); if (t->isSparse) {
} ShowNTErrors("TODO!");
else{ }
KernelLossBackwardSquaredErrorBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum); else if (size == y->unitNum) {
KernelLossBackwardSquaredError << <blocks, threads >> >(dedyp, tp, yp, y->unitNum);
}
else {
KernelLossBackwardSquaredErrorBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
} }
}
/* /*
cross entropy cross entropy
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i dloss/dy_i = -t_i / y_i
*/ */
else if(LFName == CROSSENTROPY){ else if (LFName == CROSSENTROPY) {
if(t->isSparse){ if (t->isSparse) {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
else if(size == y->unitNum){ else if (size == y->unitNum) {
KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize); KernelLossBackwardCrossEntropy << <blocks, threads >> >(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
}
else {
KernelLossBackwardCrossEntropyBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
}
BacktoCudaDev(y->devID, devIDBackup);
}
else if (t->dataType == X_FLOAT16 &&
y->dataType == X_FLOAT16 &&
dedy->dataType == X_FLOAT16) {
half * tp = (half*)t->data;
half * yp = (half*)y->data;
half * dedyp = (half*)dedy->data;
int devIDBackup;
ProtectCudaDev(y->devID, devIDBackup);
/*
squared error
loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
dloss/dy_i = y_i - t_i
*/
if (LFName == SQUAREDERROR) {
if (t->isSparse) {
ShowNTErrors("TODO!");
}
else if (size == y->unitNum) {
KernelLossBackwardSquaredError << <blocks, threads >> >(dedyp, tp, yp, y->unitNum);
}
else {
KernelLossBackwardSquaredErrorBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
} }
else{
KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum); /*
cross entropy
loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
dloss/dy_i = -t_i / y_i
*/
else if (LFName == CROSSENTROPY) {
if (t->isSparse) {
ShowNTErrors("TODO!");
}
else if (size == y->unitNum) {
KernelLossBackwardCrossEntropy << <blocks, threads >> >(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
}
else {
KernelLossBackwardCrossEntropyBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
}
} }
BacktoCudaDev(y->devID, devIDBackup);
} }
else{ else{
ShowNTErrors("TODO"); ShowNTErrors("TODO");
} }
BacktoCudaDev(y->devID, devIDBackup);
} }
#endif #endif
......
...@@ -82,9 +82,9 @@ bool Test() ...@@ -82,9 +82,9 @@ bool Test()
//wrong = !TestCrossEntropy() || wrong; //wrong = !TestCrossEntropy() || wrong;
//wrong = !TestDropout() || wrong; //wrong = !TestDropout() || wrong;
//wrong = !TestHardTanH() || wrong; wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong; //wrong = !TestIdentity() || wrong;
wrong = !TestLogSoftmax() || wrong; //wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong; //wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong; //wrong = !TestRectify() || wrong;
//wrong = !TestSigmoid() || wrong; //wrong = !TestSigmoid() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论