Commit 1d5cd56f by liyinqiao

Merge branch 'xuchen' into liyinqiao

# Conflicts:
#	source/tensor/XTensor.cpp
#	source/tensor/core/getandset/OnehotAndIndex.cpp
#	source/tensor/core/getandset/OnehotAndIndex.cu
parents f4838e86 36903fdb
...@@ -67,17 +67,49 @@ int BinaryMod(int x, int num) ...@@ -67,17 +67,49 @@ int BinaryMod(int x, int num)
} }
/* define three marco separately, specify the respective function names */ /* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
template<class T> \ template<class T> \
void _funcName(const XTensor * a, XTensor * b, T num) \ void _funcName(const XTensor * a, XTensor * b, T num) \
{ \ { \
/* run it on GPUs */ \ /* run it on GPUs */ \
if (a->devID >= 0) { \ if (a->devID >= 0) { \
if (useCUDA) { \
_cudaFuncName(a, b, num); \ _cudaFuncName(a, b, num); \
return; \ return; \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (int)origFunc((int)d[i], (T)num); \
} \
else if (a->dataType == X_FLOAT) { \
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc((float)d[i], (T)num); \
} \
else if (a->dataType == X_DOUBLE) { \
double * d = (double*)a->data; \
double * db = (double*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (double)origFunc((double)d[i], (T)num); \
} \
else \ else \
ShowNTErrors("TO DO!"); \
} \
template void _funcName<int>(const XTensor*, XTensor*, int); \
template void _funcName<float>(const XTensor*, XTensor*, float); \
template void _funcName<double>(const XTensor*, XTensor*, double);
#else
#define _SIMPLE_BINARY_FUNCTION(_funcName, origFunc) \
template<class T> \
void _funcName(const XTensor * a, XTensor * b, T num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((XTensor::IsSameShaped(a, b)), \
...@@ -106,6 +138,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -106,6 +138,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
template void _funcName<int>(const XTensor*, XTensor*, int); \ template void _funcName<int>(const XTensor*, XTensor*, int); \
template void _funcName<float>(const XTensor*, XTensor*, float); \ template void _funcName<float>(const XTensor*, XTensor*, float); \
template void _funcName<double>(const XTensor*, XTensor*, double); template void _funcName<double>(const XTensor*, XTensor*, double);
#endif
#define _SIMPLE_BINARY_FUNCTION_ME(_funcNameMe, _funcName) \ #define _SIMPLE_BINARY_FUNCTION_ME(_funcNameMe, _funcName) \
template<class T> \ template<class T> \
...@@ -159,31 +192,40 @@ template void funcName<int>(const XTensor&, XTensor&, int); ...@@ -159,31 +192,40 @@ template void funcName<int>(const XTensor&, XTensor&, int);
template void funcName<float>(const XTensor&, XTensor&, float); \ template void funcName<float>(const XTensor&, XTensor&, float); \
template void funcName<double>(const XTensor&, XTensor&, double); template void funcName<double>(const XTensor&, XTensor&, double);
#ifdef USE_CUDA
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescale, BinaryDescale) _SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescale, BinaryDescale)
_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, BinaryMod)
_SIMPLE_BINARY_FUNCTION(_Power, _CudaPower, BinaryPower)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, BinaryScale)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, BinaryShift)
#else
_SIMPLE_BINARY_FUNCTION(_Descale, BinaryDescale)
_SIMPLE_BINARY_FUNCTION(_Mod, BinaryMod)
_SIMPLE_BINARY_FUNCTION(_Power, BinaryPower)
_SIMPLE_BINARY_FUNCTION(_Scale, BinaryScale)
_SIMPLE_BINARY_FUNCTION(_Shift, BinaryShift)
#endif
_SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale) _SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_ME(DescaleMe, _Descale) SIMPLE_BINARY_FUNCTION_ME(DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE) SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE)
SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE) SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE)
_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, BinaryMod)
_SIMPLE_BINARY_FUNCTION_ME(_ModMe, _Mod) _SIMPLE_BINARY_FUNCTION_ME(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_ME(ModMe, _Mod) SIMPLE_BINARY_FUNCTION_ME(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION(Mod, _Mod, MATH_MOD) SIMPLE_BINARY_FUNCTION(Mod, _Mod, MATH_MOD)
SIMPLE_BINARY_FUNCTION_VOID(Mod, _Mod, MATH_MOD) SIMPLE_BINARY_FUNCTION_VOID(Mod, _Mod, MATH_MOD)
_SIMPLE_BINARY_FUNCTION(_Power, _CudaPower, BinaryPower)
_SIMPLE_BINARY_FUNCTION_ME(_PowerMe, _Power) _SIMPLE_BINARY_FUNCTION_ME(_PowerMe, _Power)
SIMPLE_BINARY_FUNCTION_ME(PowerMe, _Power) SIMPLE_BINARY_FUNCTION_ME(PowerMe, _Power)
SIMPLE_BINARY_FUNCTION(Power, _Power, MATH_POWER) SIMPLE_BINARY_FUNCTION(Power, _Power, MATH_POWER)
SIMPLE_BINARY_FUNCTION_VOID(Power, _Power, MATH_POWER) SIMPLE_BINARY_FUNCTION_VOID(Power, _Power, MATH_POWER)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, BinaryScale)
_SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale) _SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_ME(ScaleMe, _Scale) SIMPLE_BINARY_FUNCTION_ME(ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE) SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE)
SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE) SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, BinaryShift)
_SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift) _SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_ME(ShiftMe, _Shift) SIMPLE_BINARY_FUNCTION_ME(ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT) SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
......
...@@ -36,8 +36,8 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b) ...@@ -36,8 +36,8 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b)
return (a != b ? 1.0F : 0.0F); return (a != b ? 1.0F : 0.0F);
} }
#ifdef USE_CUDA
/* define three marco separately, specify the respective function names */ /* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \ void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \ { \
...@@ -46,11 +46,23 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number) ...@@ -46,11 +46,23 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number)
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \ CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \ /* run it on GPUs */ \
if (a->devID >= 0) { \ if (a->devID >= 0) { \
if (useCUDA) { \
_cudaFuncName(a, b, number); \ _cudaFuncName(a, b, number); \
return; \ return; \
} \ } \
else \ DTYPE * d = (DTYPE*)a->data; \
DTYPE * db = (DTYPE*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i], number); \
}
#else
#define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
DTYPE * d = (DTYPE*)a->data; \ DTYPE * d = (DTYPE*)a->data; \
...@@ -58,6 +70,7 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number) ...@@ -58,6 +70,7 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number)
for (int i = 0; i < a->unitNum; i++) \ for (int i = 0; i < a->unitNum; i++) \
db[i] = (DTYPE)origFunc(d[i], number); \ db[i] = (DTYPE)origFunc(d[i], number); \
} }
#endif
#define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \ #define _SIMPLE_COMPARE_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a, DTYPE number) \ void _funcNameMe(XTensor * a, DTYPE number) \
...@@ -92,18 +105,22 @@ void funcName(const XTensor &a, XTensor &b, DTYPE number) ...@@ -92,18 +105,22 @@ void funcName(const XTensor &a, XTensor &b, DTYPE number)
// I think we needn't to make link. // I think we needn't to make link.
// XLink::MakeLink(&a, NULL, &b, operationId); // XLink::MakeLink(&a, NULL, &b, operationId);
#ifdef USE_CUDA
_SIMPLE_COMPARE_FUNCTION(_Equal, _CudaEqual, myIsEqual) _SIMPLE_COMPARE_FUNCTION(_Equal, _CudaEqual, myIsEqual)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, _CudaNotEqual, myIsNotEqual)
#else
_SIMPLE_COMPARE_FUNCTION(_Equal, myIsEqual)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, myIsNotEqual)
#endif
_SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal) _SIMPLE_COMPARE_FUNCTION_ME(_EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION_ME(EqualMe, _Equal) SIMPLE_COMPARE_FUNCTION_ME(EqualMe, _Equal)
SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL) SIMPLE_COMPARE_FUNCTION(Equal, _Equal, MATH_EQUAL)
SIMPLE_COMPARE_FUNCTION_VOID(Equal, _Equal, MATH_EQUAL) SIMPLE_COMPARE_FUNCTION_VOID(Equal, _Equal, MATH_EQUAL)
_SIMPLE_COMPARE_FUNCTION(_NotEqual, _CudaNotEqual, myIsNotEqual)
_SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual) _SIMPLE_COMPARE_FUNCTION_ME(_NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual) SIMPLE_COMPARE_FUNCTION_ME(NotEqualMe, _NotEqual)
SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL) SIMPLE_COMPARE_FUNCTION(NotEqual, _NotEqual, MATH_NOTEQUAL)
SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL) SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL)
#endif
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -68,16 +68,44 @@ T UnaryIsZero(T r) ...@@ -68,16 +68,44 @@ T UnaryIsZero(T r)
} }
/* define three marco separately, specify the respective function names */ /* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \ void _funcName(const XTensor * a, XTensor * b) \
{ \ { \
/* run it on GPUs */ \ /* run it on GPUs */ \
if (a->devID >= 0) { \ if (a->devID >= 0) { \
if (useCUDA) { \
_cudaFuncName(a, b); \ _cudaFuncName(a, b); \
return; \ return; \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (int)origFunc(d[i]); \
} \
else if (a->dataType == X_FLOAT) { \
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc(d[i]); \
} \
else if (a->dataType == X_DOUBLE) { \
double * d = (double*)a->data; \
double * db = (double*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (double)origFunc(d[i]); \
} \
else \ else \
ShowNTErrors("TO DO!"); \
}
#else
#define _SIMPLE_UNARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((XTensor::IsSameShaped(a, b)), \
...@@ -103,6 +131,7 @@ void _funcName(const XTensor * a, XTensor * b) ...@@ -103,6 +131,7 @@ void _funcName(const XTensor * a, XTensor * b)
else \ else \
ShowNTErrors("TO DO!"); \ ShowNTErrors("TO DO!"); \
} }
#endif
#define _SIMPLE_UNARY_FUNCTION_ME(_funcNameMe, _funcName) \ #define _SIMPLE_UNARY_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a) \ void _funcNameMe(XTensor * a) \
...@@ -138,6 +167,7 @@ void funcName(const XTensor & a, XTensor & b) ...@@ -138,6 +167,7 @@ void funcName(const XTensor & a, XTensor & b)
} \ } \
} }
#ifdef USE_CUDA
_SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs) _SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil) _SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp) _SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
...@@ -153,6 +183,23 @@ _SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, UnarySquare) ...@@ -153,6 +183,23 @@ _SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, UnarySquare)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin) _SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos) _SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos)
_SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan) _SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
#else
_SIMPLE_UNARY_FUNCTION(_Absolute, fabs)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, UnaryIsNonZero)
_SIMPLE_UNARY_FUNCTION(_IsZero, UnaryIsZero)
_SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION(_Negate, UnaryNegate)
_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION(_Sign, UnarySign)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION(_Square, UnarySquare)
_SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION(_Cos, cos)
_SIMPLE_UNARY_FUNCTION(_Tan, tan)
#endif
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute) _SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION_ME(AbsoluteMe, _Absolute) SIMPLE_UNARY_FUNCTION_ME(AbsoluteMe, _Absolute)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论