Commit 182633ac by xuchen

Merge branch 'xuchen' into xiaotong-working

parents afa887d4 4336f2f9
...@@ -29,10 +29,18 @@ const char * GetOPName(int type) ...@@ -29,10 +29,18 @@ const char * GetOPName(int type)
if ((type & MATH_BASE) != 0){ if ((type & MATH_BASE) != 0){
if (type == MATH_ABSOLUTE) if (type == MATH_ABSOLUTE)
return "M_ABSOLUTE"; return "M_ABSOLUTE";
else if (type == MATH_CEIL)
return "M_CEIL";
else if (type == MATH_EXP) else if (type == MATH_EXP)
return "M_EXP"; return "M_EXP";
else if (type == MATH_FLOOR)
return "M_FLOOR";
else if (type == MATH_LOG) else if (type == MATH_LOG)
return "M_LOG"; return "M_LOG";
else if (type == MATH_SQRT)
return "M_SQRT";
else if (type == MATH_SQUARE)
return "M_SQUARE";
else if (type == MATH_SIN) else if (type == MATH_SIN)
return "M_SIN"; return "M_SIN";
else if (type == MATH_COS) else if (type == MATH_COS)
...@@ -113,7 +121,9 @@ const char * GetOPName(int type) ...@@ -113,7 +121,9 @@ const char * GetOPName(int type)
return "S_TOPK"; return "S_TOPK";
} }
else if ((type & FUNCTION_BASE) != 0){ else if ((type & FUNCTION_BASE) != 0){
if (type == FUNC_HARDTANH) if (type == FUNC_DROPOUT)
return "F_DROPOUT";
else if (type == FUNC_HARDTANH)
return "F_HARDTANH"; return "F_HARDTANH";
else if (type == FUNC_IDENTITY) else if (type == FUNC_IDENTITY)
return "F_IDENTITY"; return "F_IDENTITY";
......
...@@ -32,9 +32,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -32,9 +32,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_BASE 0x00001000 #define MATH_BASE 0x00001000
#define MATH_ABSOLUTE MATH_BASE + 1 #define MATH_ABSOLUTE MATH_BASE + 1
#define MATH_EXP MATH_ABSOLUTE + 1 #define MATH_CEIL MATH_ABSOLUTE + 1
#define MATH_LOG MATH_EXP + 1 #define MATH_EXP MATH_CEIL + 1
#define MATH_SIN MATH_LOG + 1 #define MATH_FLOOR MATH_EXP + 1
#define MATH_LOG MATH_FLOOR + 1
#define MATH_SQRT MATH_LOG + 1
#define MATH_SQUARE MATH_SQRT + 1
#define MATH_SIN MATH_SQUARE + 1
#define MATH_COS MATH_SIN + 1 #define MATH_COS MATH_SIN + 1
#define MATH_TAN MATH_COS + 1 #define MATH_TAN MATH_COS + 1
#define MATH_ROUND MATH_TAN + 1 #define MATH_ROUND MATH_TAN + 1
...@@ -88,7 +92,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -88,7 +92,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* activation functions */ /* activation functions */
#define FUNCTION_BASE DATA_BASE * 2 #define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1 #define FUNC_DROPOUT FUNCTION_BASE + 1
#define FUNC_HARDTANH FUNC_DROPOUT + 1
#define FUNC_IDENTITY FUNC_HARDTANH + 1 #define FUNC_IDENTITY FUNC_HARDTANH + 1
#define FUNC_LOGSOFTMAX FUNC_IDENTITY + 1 #define FUNC_LOGSOFTMAX FUNC_IDENTITY + 1
#define FUNC_RECTIFY FUNC_LOGSOFTMAX + 1 #define FUNC_RECTIFY FUNC_LOGSOFTMAX + 1
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
#include <math.h> #include <math.h>
#include "../../XName.h" #include "../../XName.h"
#include "Unary.h" #include "Unary.h"
...@@ -5,9 +26,18 @@ ...@@ -5,9 +26,18 @@
namespace nts{ namespace nts{
DTYPE square(DTYPE x)
{
return x * x;
}
DTYPE round(DTYPE r)
{
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
#ifdef USE_CUDA #ifdef USE_CUDA
/* define three marco separately, specify the respective function names */ /* define three marco separately, specify the respective function names (GPU mode) */
#define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \ void _funcName(const XTensor * a, XTensor * b) \
{ \ { \
...@@ -45,14 +75,35 @@ _SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs) ...@@ -45,14 +75,35 @@ _SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute) _SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE) SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp) _SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp) _SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP) SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log) _SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log) _SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG) SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, _CudaSqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin) _SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin) _SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN) SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
...@@ -65,11 +116,8 @@ _SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan) ...@@ -65,11 +116,8 @@ _SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
_SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan) _SIMPLE_UNARY_FUNCTION_ME(_TanMe, _Tan)
SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN) SIMPLE_UNARY_FUNCTION(Tan, _Tan, MATH_TAN)
/*_SIMPLE_UNARY_FUNCTION(_Round, _CudaRound, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)*/
#else #else
/* define three marco separately, specify the respective function names */ /* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_UNARY_FUNCTION(_funcName, origFunc) \ #define _SIMPLE_UNARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \ void _funcName(const XTensor * a, XTensor * b) \
{ \ { \
...@@ -102,14 +150,35 @@ _SIMPLE_UNARY_FUNCTION(_Absolute, fabs) ...@@ -102,14 +150,35 @@ _SIMPLE_UNARY_FUNCTION(_Absolute, fabs)
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute) _SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE) SIMPLE_UNARY_FUNCTION(Absolute, _Absolute, MATH_ABSOLUTE)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION_ME(_CeilMe, _Ceil)
SIMPLE_UNARY_FUNCTION(Ceil, _Ceil, MATH_CEIL)
_SIMPLE_UNARY_FUNCTION(_Exp, exp) _SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp) _SIMPLE_UNARY_FUNCTION_ME(_ExpMe, _Exp)
SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP) SIMPLE_UNARY_FUNCTION(Exp, _Exp, MATH_EXP)
_SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_Log, log) _SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log) _SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG) SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION_ME(_RoundMe, _Round)
SIMPLE_UNARY_FUNCTION(Round, _Round, MATH_ROUND)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION_ME(_SqrtMe, _Sqrt)
SIMPLE_UNARY_FUNCTION(Sqrt, _Sqrt, MATH_SQRT)
_SIMPLE_UNARY_FUNCTION(_Square, square)
_SIMPLE_UNARY_FUNCTION_ME(_SquareMe, _Square)
SIMPLE_UNARY_FUNCTION(Square, _Square, MATH_SQUARE)
_SIMPLE_UNARY_FUNCTION(_Sin, sin) _SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin) _SIMPLE_UNARY_FUNCTION_ME(_SinMe, _Sin)
SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN) SIMPLE_UNARY_FUNCTION(Sin, _Sin, MATH_SIN)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
#include <math.h> #include <math.h>
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h" #include "../../XName.h"
#include "Unary.h"
#include "Unary.cuh" #include "Unary.cuh"
namespace nts { namespace nts {
__device__
DTYPE CudaSquare(DTYPE x)
{
return x * x;
}
__device__
DTYPE CudaRound(DTYPE r)
{
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
#define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \ #define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \
__global__ \ __global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \ void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \
...@@ -15,7 +49,7 @@ void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \ ...@@ -15,7 +49,7 @@ void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \
b[i] = (DTYPE)origFunc(a[i]); \ b[i] = (DTYPE)origFunc(a[i]); \
} \ } \
__global__ \ __global__ \
void Kernel##funcName(__half * a, __half * b, int size) \ void Kernel##funcName(__half * a, __half * b, int size) \
{ \ { \
return; \ return; \
} \ } \
...@@ -37,12 +71,12 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \ ...@@ -37,12 +71,12 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \
ProtectCudaDev(a->devID, devIDBackup); \ ProtectCudaDev(a->devID, devIDBackup); \
\ \
if (a->dataType == DEFAULT_DTYPE) { \ if (a->dataType == DEFAULT_DTYPE) { \
Kernel##funcName << <blocks, threads >> > \ Kernel##funcName<<<blocks, threads>>> \
((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum); \ ((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum); \
} \ } \
else if (a->dataType == X_FLOAT16) { \ else if (a->dataType == X_FLOAT16) { \
Kernel##funcName << <blocks, threads >> > \ Kernel##funcName<<<blocks, threads>>> \
((__half*)a->data, (__half*)b->data, a->unitNum); \ ((__half*)a->data, (__half*)b->data, a->unitNum); \
} \ } \
else { \ else { \
ShowNTErrors("TODO!"); \ ShowNTErrors("TODO!"); \
...@@ -52,11 +86,16 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \ ...@@ -52,11 +86,16 @@ void _Cuda##funcName(const XTensor * a, XTensor * b) \
} \ } \
SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs) SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs)
SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil)
SIMPLE_UNARY_FUNCTION_GPU(Exp, exp) SIMPLE_UNARY_FUNCTION_GPU(Exp, exp)
SIMPLE_UNARY_FUNCTION_GPU(Floor, floor)
SIMPLE_UNARY_FUNCTION_GPU(Log, log) SIMPLE_UNARY_FUNCTION_GPU(Log, log)
SIMPLE_UNARY_FUNCTION_GPU(Round, CudaRound)
SIMPLE_UNARY_FUNCTION_GPU(Sqrt, sqrt)
SIMPLE_UNARY_FUNCTION_GPU(Square, CudaSquare)
SIMPLE_UNARY_FUNCTION_GPU(Sin, sin) SIMPLE_UNARY_FUNCTION_GPU(Sin, sin)
SIMPLE_UNARY_FUNCTION_GPU(Cos, cos) SIMPLE_UNARY_FUNCTION_GPU(Cos, cos)
SIMPLE_UNARY_FUNCTION_GPU(Tan, tan) SIMPLE_UNARY_FUNCTION_GPU(Tan, tan)
//SIMPLE_UNARY_FUNCTION_GPU(Round, round)
} }
\ No newline at end of file
...@@ -38,6 +38,15 @@ void KernelAbsolute(__half * a, __half * b, int size); ...@@ -38,6 +38,15 @@ void KernelAbsolute(__half * a, __half * b, int size);
/* set each entry to its absolute value */ /* set each entry to its absolute value */
void _CudaAbsolute(const XTensor * a, XTensor * b); void _CudaAbsolute(const XTensor * a, XTensor * b);
/* set each entry to its ceil value (CUDA Kernel) */
__global__
void KernelCeil(DTYPE * a, DTYPE * b, int size);
/* set each entry to its ceil value (CUDA Kernel) with float16 data type*/
__global__
void KernelCeil(__half * a, __half * b, int size);
/* set each entry to its ceil value */
void _CudaCeil(const XTensor * a, XTensor * b);
/* set each entry to its exponent value (CUDA Kernel) */ /* set each entry to its exponent value (CUDA Kernel) */
__global__ __global__
void KernelExp(DTYPE * a, DTYPE * b, int size); void KernelExp(DTYPE * a, DTYPE * b, int size);
...@@ -47,6 +56,15 @@ void KernelExp(__half * a, __half * b, int size); ...@@ -47,6 +56,15 @@ void KernelExp(__half * a, __half * b, int size);
/* set each entry to its exponent value */ /* set each entry to its exponent value */
void _CudaExp(const XTensor * a, XTensor * b); void _CudaExp(const XTensor * a, XTensor * b);
/* set each entry to its floor value (CUDA Kernel) */
__global__
void KernelFloor(DTYPE * a, DTYPE * b, int size);
/* set each entry to its floor value (CUDA Kernel) with float16 data type*/
__global__
void KernelFloor(__half * a, __half * b, int size);
/* set each entry to its floor value */
void _CudaFloor(const XTensor * a, XTensor * b);
/* set each entry to its logarithm value (CUDA Kernel) */ /* set each entry to its logarithm value (CUDA Kernel) */
__global__ __global__
void KernelLog(DTYPE * a, DTYPE * b, int size); void KernelLog(DTYPE * a, DTYPE * b, int size);
...@@ -56,6 +74,34 @@ void KernelLog(__half * a, __half * b, int size); ...@@ -56,6 +74,34 @@ void KernelLog(__half * a, __half * b, int size);
/* set each entry to its logarithm value */ /* set each entry to its logarithm value */
void _CudaLog(const XTensor * a, XTensor * b); void _CudaLog(const XTensor * a, XTensor * b);
/* set each entry to its round value (CUDA Kernel) */
__global__
void KernelRound(DTYPE * a, DTYPE * b, int size);
/* set each entry to its round value (CUDA Kernel) with float16 data type*/
__global__
void KernelRound(__half * a, __half * b, int size);
/* set each entry to its round value */
void _CudaRound(const XTensor * a, XTensor * b);
/* set each entry to its sqrt value (CUDA Kernel) */
__global__
void KernelSqrt(DTYPE * a, DTYPE * b, int size);
/* set each entry to its sqrt value (CUDA Kernel) with float16 data type*/
__global__
void KernelSqrt(__half * a, __half * b, int size);
/* set each entry to its sqrt value */
void _CudaSqrt(const XTensor * a, XTensor * b);
/* set each entry to its square value (CUDA Kernel) */
__global__
void KernelSquare(DTYPE * a, DTYPE * b, int size);
/* set each entry to its square value (CUDA Kernel) with float16 data type*/
__global__
void KernelSquare(__half * a, __half * b, int size);
/* set each entry to its square value */
void _CudaSquare(const XTensor * a, XTensor * b);
/* set each entry to its sine value (CUDA Kernel) */ /* set each entry to its sine value (CUDA Kernel) */
__global__ __global__
void KernelSin(DTYPE * a, DTYPE * b, int size); void KernelSin(DTYPE * a, DTYPE * b, int size);
...@@ -83,15 +129,6 @@ void KernelTan(__half * a, __half * b, int size); ...@@ -83,15 +129,6 @@ void KernelTan(__half * a, __half * b, int size);
/* set each entry to its tangent value */ /* set each entry to its tangent value */
void _CudaTan(const XTensor * a, XTensor * b); void _CudaTan(const XTensor * a, XTensor * b);
/* set each entry to its round value (CUDA Kernel) */
//__global__
//void KernelRound(DTYPE * a, DTYPE * b, int size);
/* set each entry to its round value (CUDA Kernel) with float16 data type*/
//__global__
//void KernelRound(__half * a, __half * b, int size);
/* set each entry to its round value */
//void _CudaRound(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,95 +28,103 @@ namespace nts{ ...@@ -28,95 +28,103 @@ namespace nts{
/* set every entry to its absolute value */ /* set every entry to its absolute value */
void _Absolute(const XTensor * a, XTensor * b); void _Absolute(const XTensor * a, XTensor * b);
/* /* set every entry to its absolute value (do it on site)
set every entry to its absolute value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _AbsoluteMe(XTensor * a); void _AbsoluteMe(XTensor * a);
/* /* set every entry to its absolute value (return a XTensor structure)
set every entry to its absolute value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Absolute(const XTensor & a); XTensor Absolute(const XTensor & a);
/* set every entry to its ceil value */
void _Ceil(const XTensor * a, XTensor * b);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void _CeilMe(XTensor * a);
/* set every entry to its ceil value (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a);
/* set every entry to its exponent value */ /* set every entry to its exponent value */
void _Exp(const XTensor * a, XTensor * b); void _Exp(const XTensor * a, XTensor * b);
/* /* set every entry to its exponent value (do it on site)
set every entry to its exponent value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _ExpMe(XTensor * a); void _ExpMe(XTensor * a);
/* /* set every entry to its exponent value (return a XTensor structure)
set every entry to its exponent value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Exp(const XTensor & a); XTensor Exp(const XTensor & a);
/* set every entry to its floor value */
void _Floor(const XTensor * a, XTensor * b);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void _FloorMe(XTensor * a);
/* set every entry to its floor value (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a);
/* set every entry to its logarithm value */ /* set every entry to its logarithm value */
void _Log(const XTensor * a, XTensor * b); void _Log(const XTensor * a, XTensor * b);
/* /* set every entry to its logarithm value (do it on site)
set every entry to its logarithm value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _LogMe(XTensor * a); void _LogMe(XTensor * a);
/* /* set every entry to its logarithm value (return a XTensor structure)
set every entry to its logarithm value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Log(const XTensor & a); XTensor Log(const XTensor & a);
/* set every entry to its round value */
void _Round(const XTensor * a, XTensor * b);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void _RoundMe(XTensor * a);
/* set every entry to its round value (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a);
/* set every entry to its sqrt value */
void _Sqrt(const XTensor * a, XTensor * b);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a);
/* set every entry to its square value */
void _Square(const XTensor * a, XTensor * b);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void _SquareMe(XTensor * a);
/* set every entry to its square value (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a);
/* set every entry to its sine value */ /* set every entry to its sine value */
void _Sin(const XTensor * a, XTensor * b); void _Sin(const XTensor * a, XTensor * b);
/* /* set every entry to its sine value (do it on site)
set every entry to its sine value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _SinMe(XTensor * a); void _SinMe(XTensor * a);
/* /* set every entry to its sine value (return a XTensor structure)
set every entry to its sine value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Sin(const XTensor & a); XTensor Sin(const XTensor & a);
/* set every entry to its cosine value */ /* set every entry to its cosine value */
void _Cos(const XTensor * a, XTensor * b); void _Cos(const XTensor * a, XTensor * b);
/* /* set every entry to its cosine value (do it on site)
set every entry to its cosine value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _CosMe(XTensor * a); void _CosMe(XTensor * a);
/* /* set every entry to its cosine value (return a XTensor structure)
set every entry to its cosine value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Cos(const XTensor & a); XTensor Cos(const XTensor & a);
/* set every entry to its tangent value */ /* set every entry to its tangent value */
void _Tan(const XTensor * a, XTensor * b); void _Tan(const XTensor * a, XTensor * b);
/* /* set every entry to its tangent value (do it on site)
set every entry to its tangent value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _TanMe(XTensor * a); void _TanMe(XTensor * a);
/* /* set every entry to its tangent value (return a XTensor structure)
set every entry to its tangent value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Tan(const XTensor & a); XTensor Tan(const XTensor & a);
/* set every entry to its round value */
//void _Round(const XTensor * a, XTensor * b);
/*
set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing
*/
//void _RoundMe(XTensor * a);
/*
set every entry to its round value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
//XTensor Round(const XTensor & a);
} }
#endif //end __UNARY_H__ #endif //end __UNARY_H__
\ No newline at end of file
...@@ -480,8 +480,8 @@ void KernelReduceSumFast(__half * input, __half * output, ...@@ -480,8 +480,8 @@ void KernelReduceSumFast(__half * input, __half * output,
if data storage is discontinuius ,use this way to reduce if data storage is discontinuius ,use this way to reduce
*/ */
__global__ __global__
void KernelReduceSumDiscontinuousStorage(DTYPE * input, DTYPE * output, int stride, int blockNum, void KernelReduceSumDiscontinuousStorage(DTYPE * input, DTYPE * output, int stride, int strideNum,
int strideNum, DTYPE * shift, DTYPE power, bool isExp) int blockNum, DTYPE * shift, DTYPE power, bool isExp)
{ {
__shared__ DTYPE bias[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ DTYPE bias[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int idx = blockDim.x * blockIdx.x + threadIdx.x; int idx = blockDim.x * blockIdx.x + threadIdx.x;
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#include "../XName.h"
#include <math.h>
#include <time.h>
#include "Dropout.h"
#include "Dropout.cuh"
#include "../core/arithmetic/Multiply.h"
#include "../core/math/ScaleAndShift.h"
namespace nts{ // namespace nts(NiuTrans.Tensor
/*
generate a random bernoulli number
*/
DTYPE RandomBernoulli(DTYPE prob)
{
return (DTYPE)rand()/(DTYPE)RAND_MAX > prob ? (DTYPE)1.0 : (DTYPE)0.0;
}
/*
dropout function
During training, randomly zeroes some of the elements of the input tensor
with probability p using samples from a Bernoulli distribution.
The elements to zero are randomized on every forward call.
This has proven to be an effective technique for regularization and
preventing the co-adaptation of neurons as described in the paper
"Improving neural networks by preventing co-adaptation of feature detectors".
Furthermore, the outputs are scaled by a factor of \frac{1}{1-p} during training.
This means that during evaluation the module simply computes an identity function.
>> x - input tensor
>> y - output tensor
>> prob - probability to set an element zero
*/
void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
{
CheckNTErrors(prob >= 0.0 && prob <= 1.0, "The probability must be 0-1!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
/* generate a mask tensor again with special probability */
srand(seed);
int unitNum = x->unitNum;
DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob);
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
maskTensor->SetData(maskArray, unitNum);
#ifdef USE_CUDA
if(x->devID >=0 || y->devID >= 0){
_CudaDropout(x, y, maskTensor, scaleFactor);
DelTensorBuf(maskTensor);
delete[] maskArray;
return;
}
#endif
XTensor * inter = NewTensorBuf(x, x->devID, x->mem);
_Multiply(x, maskTensor, inter);
_ScaleAndShift(inter, y, scaleFactor, 0);
DelTensorBuf(inter);
DelTensorBuf(maskTensor);
delete[] maskArray;
}
/*
dropout function (return a XTensor structure)
make a new tensor to keep the result and return it
During training, randomly zeroes some of the elements of the input tensor
with probability p using samples from a Bernoulli distribution.
The elements to zero are randomized on every forward call.
This has proven to be an effective technique for regularization and
preventing the co-adaptation of neurons as described in the paper
"Improving neural networks by preventing co-adaptation of feature detectors".
Furthermore, the outputs are scaled by a factor of \frac{1}{1-p} during training.
This means that during evaluation the module simply computes an identity function.
>> x - input tensor
>> y - output tensor
>> prob - probability to set an element zero
*/
XTensor Dropout(const XTensor &x, DTYPE prob)
{
XTensor y(&x);
y.SetTMP();
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
/* generate a mask tensor again with special probability */
srand((unsigned int)time(NULL));
int unitNum = x.unitNum;
DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob);
XTensor maskTensor(&x);
maskTensor.SetData(maskArray, unitNum);
XTensor inter;
inter = Multiply(x, maskTensor);
y = ScaleAndShift(inter, scaleFactor, 0);
delete[] maskArray;
///* tensor connection */
//XLink::MakeLink(&x, NULL, &y, FUNC_DROPOUT);
//XLink::AddParamToHead(&y, prob);
return y;
}
/*
backward computation of dropout function
dE/dx = dE/dy * dy/dx
>> y - output of the dropout function
>> x - input of the dropout function
>> dedy - dE/dy
>> dedx - dE/dx
>> prob - probability to set an element zero
*/
void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob)
{
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{
int unitNum = y->unitNum;
DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - prob);
/* generate a mask tensor again with special probability */
srand(seed);
DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob);
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
maskTensor->SetData(maskArray, unitNum);
#ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){
_CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
DelTensorBuf(maskTensor);
delete[] maskArray;
return;
}
#endif
DTYPE * dedyp = (DTYPE*)dedy->data;
DTYPE * dedxp = (DTYPE*)dedx->data;
/* dE/dx = dE/dy * dy/dx */
for(int i = 0; i < unitNum; i++)
dedxp[i] = dedyp[i] * maskArray[i] * scaleFactor;
DelTensorBuf(maskTensor);
delete[] maskArray;
}
else
ShowNTErrors("TODO!");
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#include "Dropout.h"
#include "Dropout.cuh"
#include "Loss.cuh"
#include "../XDevice.h"
#ifdef USE_CUDA
// the CUDA stuff
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuda.h>
#endif
namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
dropout function (Cuda kernel)
>> x - input data pointer
>> y - output data pointer
>> m - mask indicator to set zero
>> s - the scale factor
>> size - size of input/output
*/
__global__
void KernelDropoutCompute(DTYPE * x, DTYPE * y, DTYPE * m, DTYPE s, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
y[i] = x[i] * m[i] * s;
}
}
/*
dropout function (Cuda version)
>> x - input tensor
>> y - output tensor
>> mask - mask tensor to set 0
>> scaleFactor - the scale factor
*/
void _CudaDropout(const XTensor * x, XTensor * y, const XTensor * mask, DTYPE scaleFactor)
{
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
CheckNTErrors(!x->isSparse && !y->isSparse, "the activation function (rectify) does not support sparse matrices.");
CheckNTErrors(x->unitNum && y->unitNum, "we require two vectors with the same length.");
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
int devIDBackup;
ProtectCudaDev(x->devID, devIDBackup);
KernelDropoutCompute<<<dim3(gridSize[0]), dim3(blockSize[0])>>>((DTYPE*)x->data, (DTYPE*)y->data, (DTYPE*)mask->data, scaleFactor, x->unitNum);
BacktoCudaDev(x->devID, devIDBackup);
}
else
ShowNTErrors("TODO!");
}
/*
backward computation of dropout function (Cuda kernel)
dE/dx = dE/dy * dy/dx
>> dedy - dE/dy
>> dedx - dE/dx
>> m - mask indicator to set zero
>> s - the scale factor
>> size - size of input/output
*/
__global__
void KernelDropoutBackward(DTYPE * dedy, DTYPE * dedx,
DTYPE * m, DTYPE s, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
dedx[i] = dedy[i] * m[i] * s;
}
}
/*
backward computation of dropout function (Cuda version)
dE/dx = dE/dy * dy/dx
>> y - output of the dropout function
>> x - input of the dropout function
>> dedy - dE/dy
>> dedx - dE/dx
>> mask - mask tensor to set 0
>> scaleFactor - the scale factor
*/
void _CudaDropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx,
const XTensor * mask, DTYPE scaleFactor)
{
int gridSize[3], blockSize[3];
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
int devIDBackup;
ProtectCudaDev(x->devID, devIDBackup);
/* dE/ds = dE/dy * dy/ds */
KernelDropoutBackward<<<dim3(gridSize[0]),dim3(blockSize[0])>>>
((DTYPE*)dedy->data, (DTYPE*)dedx->data,
(DTYPE*)mask->data, scaleFactor, x->unitNum);
BacktoCudaDev(x->devID, devIDBackup);
}
else
ShowNTErrors("TODO!");
}
#endif
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#ifndef __DROPOUT_CUH__
#define __DROPOUT_CUH__
#include "../XTensor.h"
#include "Loss.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* dropout function (Cuda version) */
void _CudaDropout(const XTensor * x, XTensor * y, const XTensor * r, DTYPE scaleFactor);
/* de/dx (Cuda version) */
void _CudaDropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx,
const XTensor * mask, DTYPE scaleFactor);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUT_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#ifndef __DROPOUT_H__
#define __DROPOUT_H__
#include "../XTensor.h"
#include "Loss.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* dropout function */
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob = 0.5);
/* dropout function */
XTensor Dropout(const XTensor &x, DTYPE prob = 0.5);
/* de/dx */
void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob = 0.5);
} // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUT_H__
\ No newline at end of file
...@@ -51,8 +51,7 @@ DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, ...@@ -51,8 +51,7 @@ DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
CheckNTErrors((XTensor::IsSameShaped(gold, output)), "The input tensors must be of the same size!"); CheckNTErrors((XTensor::IsSameShaped(gold, output)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSizeRDI[0] == 1 && output->dimSizeRDI[0] == 1), "TODO!"); CheckNTErrors((gold->dimSizeRDI[0] == 1 && output->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!"); CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE), CheckNTErrors((gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE), "TODO!");
"TODO!");
int leadDimRDI = output->order - leadDim - 1; int leadDimRDI = output->order - leadDim - 1;
int dimensionSize = output->dimSizeRDI[leadDimRDI]; int dimensionSize = output->dimSizeRDI[leadDimRDI];
......
...@@ -58,8 +58,7 @@ DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -58,8 +58,7 @@ DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The input tensors must be of the same size!"); CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The input tensors must be of the same size!");
CheckNTErrors((gold->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1), "TODO!"); CheckNTErrors((gold->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!"); CheckNTErrors((gold->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((gold->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE), CheckNTErrors((gold->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE), "TODO!");
"TODO!");
CheckNTErrors((gold->devID == y->devID), "Tensors must be on the same device!"); CheckNTErrors((gold->devID == y->devID), "Tensors must be on the same device!");
CheckNTErrors((gold->devID >= 0), "Tensors must be on GPU device!"); CheckNTErrors((gold->devID >= 0), "Tensors must be on GPU device!");
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!"); CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
......
...@@ -48,19 +48,19 @@ loss function to measure the "number" of errors ...@@ -48,19 +48,19 @@ loss function to measure the "number" of errors
/* compute the loss */ /* compute the loss */
DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg); bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg);
/* compute the loss (log version) */ /* compute the loss (log version) */
DTYPE _LossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _LossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int oBeg); int leadDim, int gBeg, int gLen, int oBeg);
/* backward compuation for a single element */ /* backward compuation for a single element */
DTYPE _LossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName); DTYPE _LossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
/* backward compuation for (dense) vectors */ /* backward compuation for (dense) vectors */
void _LossBackward(XTensor * dEdY, XTensor * t, XTensor * y, void _LossBackward(XTensor * dEdY, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0); int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
*/ */
#include "../XName.h" #include "../XName.h"
#include <math.h> #include <math.h>
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
*/ */
#include "Sigmoid.h" #include "Sigmoid.h"
#include "Sigmoid.cuh" #include "Sigmoid.cuh"
......
...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* rectify function y = max(0, x) (Cuda version) */ /* sigmoid function y = 1/(1+exp(-x)) (Cuda version) */
void _CudaSigmoid(const XTensor * input, XTensor * output); void _CudaSigmoid(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#include "../XUtility.h"
#include "TDropout.h"
#include "../core/getandset/SetData.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Dropout function.
*/
bool TestDropout1()
{
/* a input tensor of size (4, 5) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 40;
dimSize[1] = 50;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */
x->SetDataRand(0, 1);
y->SetZeroAll();
/* call Dropout function */
float prob = 0.2F;
int seed = 20;
_Dropout(x, y, seed, prob);
yUser = Dropout(*x);
/* check result */
int zeroNum1 = 0;
int zeroNum2 = 0;
float * data1 = (float*)y->data;
float * data2 = (float*)yUser.data;
for (int i = 0; i < unitNum; i++){
DTYPE tmp1 = data1[i];
DTYPE tmp2 = data2[i];
if(tmp1 == 0.0F)
zeroNum1 += 1;
if(tmp2 == 0.0F)
zeroNum2 += 1;
}
printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob);
printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* initialize variables */
xGPU->SetDataRand(0, 1);
yGPU->SetZeroAll();
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, prob);
yUserGPU = Dropout(*xGPU);
/* check result */
zeroNum1 = 0;
zeroNum2 = 0;
data1 = (float*)y->data;
data2 = (float*)yUser.data;
for (int i = 0; i < unitNum; i++){
DTYPE tmp1 = data1[i];
DTYPE tmp2 = data2[i];
if(tmp1 == 0.0F)
zeroNum1 += 1;
if(tmp2 == 0.0F)
zeroNum2 += 1;
}
printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob);
printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F);
/* destroy variables */
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test Dropout function and backward computation.
*/
bool TestDropout2()
{
/* a input tensor of size (4, 5) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 4;
dimSize[1] = 5;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(order, dimSize);
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
y->SetZeroAll();
dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.0F);
/* call Dropout function */
float prob = 0.5F;
int seed = 1;
_Dropout(x, y, seed, prob);
_DropoutBackward(y, x, dedy, dedx, 1, prob);
/* check result */
y->Dump(stderr, "y");
dedx->Dump(stderr, "dedy");
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.0F);
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, prob);
_DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, prob);
/* check result */
yGPU->Dump(stderr, "yGPU");
dedxGPU->Dump(stderr, "dedyGPU");
/* destroy variables */
delete x;
delete y;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Dropout Function */
bool TestDropout()
{
XPRINT(0, stdout, "[TEST DROPOUT] dropout function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestDropout1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestDropout2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#ifndef __TEST_DROPOUT_H__
#define __TEST_DROPOUT_H__
#include "../function/Dropout.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Dropout Function */
bool TestDropout();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_DROPOUT_H__
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include<math.h>
#include "../core/math/ScaleAndShift.h" #include "../core/math/ScaleAndShift.h"
#include "TLoss.h" #include "TLoss.h"
...@@ -62,7 +63,7 @@ bool TestLoss1() ...@@ -62,7 +63,7 @@ bool TestLoss1()
error = _LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
cpuTest = (error == answer); cpuTest = (fabs(error - answer) < 1e-4);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -82,7 +83,7 @@ bool TestLoss1() ...@@ -82,7 +83,7 @@ bool TestLoss1()
error = _LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
gpuTest = (error == answer); gpuTest = (fabs(error - answer) < 1e-4);
/* destroy variables */ /* destroy variables */
delete output; delete output;
...@@ -140,7 +141,7 @@ bool TestLoss2() ...@@ -140,7 +141,7 @@ bool TestLoss2()
error = _LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0); error = _LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
cpuTest = (error == answer); cpuTest = (fabs(error - answer) < 1e-4);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -160,7 +161,7 @@ bool TestLoss2() ...@@ -160,7 +161,7 @@ bool TestLoss2()
error = _LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0); error = _LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
gpuTest = (error == answer); gpuTest = (fabs(error - answer) < 1e-4);
/* destroy variables */ /* destroy variables */
delete output; delete output;
...@@ -226,7 +227,7 @@ bool TestLoss3() ...@@ -226,7 +227,7 @@ bool TestLoss3()
error = _LossCompute(gold, output, ONEHOTERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(gold, output, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
cpuTest = (error == answer); cpuTest = (fabs(error - answer) < 1e-4);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -244,7 +245,7 @@ bool TestLoss3() ...@@ -244,7 +245,7 @@ bool TestLoss3()
error = _LossCompute(goldGPU, outputGPU, ONEHOTERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(goldGPU, outputGPU, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
gpuTest = (error == answer); gpuTest = (fabs(error - answer) < 1e-4);
/* destroy variables */ /* destroy variables */
delete output; delete output;
......
...@@ -76,6 +76,7 @@ bool Test() ...@@ -76,6 +76,7 @@ bool Test()
wrong = !TestUnsqueeze() || wrong; wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong; wrong = !TestXMem() || wrong;
wrong = !TestDropout() || wrong;
wrong = !TestHardTanH() || wrong; wrong = !TestHardTanH() || wrong;
wrong = !TestIdentity() || wrong; wrong = !TestIdentity() || wrong;
wrong = !TestLogSoftmax() || wrong; wrong = !TestLogSoftmax() || wrong;
......
...@@ -69,6 +69,7 @@ ...@@ -69,6 +69,7 @@
#include "TUnsqueeze.h" #include "TUnsqueeze.h"
#include "TXMem.h" #include "TXMem.h"
#include "TDropout.h"
#include "THardTanH.h" #include "THardTanH.h"
#include "TIdentity.h" #include "TIdentity.h"
#include "TLogSoftmax.h" #include "TLogSoftmax.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论