add binary math functions

97338baf · xiaotong · 96bdb988 · 97338baf · 97338baf · 97338baf
Commit 97338baf authored Apr 06, 2019 by xiaotong
--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -1301,7 +1301,7 @@ bool XTensor::SetInt(int value, int offset)
    
    int * d = (int*)data + offset;
    
-    return SetToDevice(devID, d, value);
+    return SetToDeviceInt(devID, d, value);
 }



--- a/source/tensor/core/CHeader.h
+++ b/source/tensor/core/CHeader.h
@@ -51,6 +51,7 @@
 #include "getandset/Select.h"
 #include "getandset/SetData.h"

+#include "math/Binary.h"
 #include "math/Clip.h"
 #include "math/Compare.h"
 #include "math/Normalize.h"

--- a/source/tensor/core/math/Binary.cpp
+++ b/source/tensor/core/math/Binary.cpp
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2019-04-05
+*/
+
+#include <math.h>
+#include "../../XName.h"
+#include "Binary.h"
+#include "Binary.cuh"
+
+namespace nts {
+
+int scale(int x, int scale)
+{
+    return x * scale;
+}
+
+int descale(int x, int descale)
+{
+    return x / descale;
+}
+
+int shift(int x, int shift)
+{
+    return x + shift;
+}
+
+int mod(int x, int mod)
+{
+    return x % mod;
+}
+
+#ifdef USE_CUDA
+/* define three marco separately, specify the respective function names  (GPU mode) */
+#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc)         \
+void _funcName(const XTensor * a, XTensor * b, int num)                     \
+{                                                                           \
+    /* run it on GPUs */                                                    \
+    if (a->devID >= 0) {                                                    \
+        _cudaFuncName(a, b, num);                                           \
+        b->Dump(stderr, "zxc");                                             \
+        return;                                                             \
+    }                                                                       \
+    CheckNTErrors((XTensor::IsSameShaped(a, b)),                            \
+                "Input tensors should have the same type!");                \
+    CheckNTErrors((a->dataType == X_INT), "TODO!");                         \
+    int * d = (int*)a->data;                                                \
+    int * db = (int*)b->data;                                               \
+    for (int i = 0; i < a->unitNum; i++)                                    \
+        db[i] = (int)origFunc(d[i], num);                                   \
+}
+
+#define SIMPLE_BINARY_FUNCTION(funcName, _funcName)                         \
+XTensor funcName(const XTensor &a, int num)                                 \
+{                                                                           \
+    XTensor b(&a);                                                          \
+    b.SetTMPFlag();                                                         \
+    _funcName(&a, &b, num);                                                 \
+    b.Dump(stderr, "asd");                                                  \
+    return b;                                                               \
+}
+
+_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, scale)
+SIMPLE_BINARY_FUNCTION(Scale, _Scale)
+
+_SIMPLE_BINARY_FUNCTION(_DeScale, _CudaDeScale, descale)
+SIMPLE_BINARY_FUNCTION(DeScale, _DeScale)
+
+_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, shift)
+SIMPLE_BINARY_FUNCTION(Shift, _Shift)
+
+_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, mod)
+SIMPLE_BINARY_FUNCTION(Mod, _Mod)
+
+#else
+/* define three marco separately, specify the respective function names (CPU mode) */
+#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc)         \
+void _funcName(const XTensor * a, XTensor * b, int num)                     \
+{                                                                           \
+    /* run it on GPUs */                                                    \
+    if (a->devID >= 0) {                                                    \
+        _cudaFuncName(a, b, num);                                           \
+        return;                                                             \
+    }                                                                       \
+    CheckNTErrors((XTensor::IsSameShaped(a, b)),                            \
+                "Input tensors should have the same type!");                \
+    CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");                 \
+    int * d = (int*)a->data;                                                \
+    int * db = (int*)b->data;                                               \
+    for (int i = 0; i < a->unitNum; i++)                                    \
+        db[i] = (int)origFunc(d[i], num);                                   \
+}
+
+#define SIMPLE_BINARY_FUNCTION(funcName, _funcName)                         \
+void funcName(const XTensor & a, XTensor &b, int num)                       \
+{                                                                           \
+    _funcName(&a, &b, num);                                                 \
+}
+
+_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, scale)
+SIMPLE_BINARY_FUNCTION(Scale, _Scale)
+
+_SIMPLE_BINARY_FUNCTION(_DeScale, _CudaDeScale, descale)
+SIMPLE_BINARY_FUNCTION(DeScale, _DeScale)
+
+_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, shift)
+SIMPLE_BINARY_FUNCTION(Shift, _Shift)
+
+_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, mod)
+SIMPLE_BINARY_FUNCTION(Mod, _Mod)
+
+#endif
+
+} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
--- a/source/tensor/core/math/Binary.cu
+++ b/source/tensor/core/math/Binary.cu
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2019-04-05
+*/
+
+#include <math.h>
+#include "../../XDevice.h"
+#include "../../XName.h"
+#include "Binary.h"
+#include "Binary.cuh"
+
+namespace nts { // namespace nts(NiuTrans.Tensor)
+
+#ifdef USE_CUDA
+
+__device__
+int cudascale(int x, int scale)
+{
+    return x * scale;
+}
+
+__device__
+int cudadescale(int x, int descale)
+{
+    return x / descale;
+}
+
+__device__
+int cudashift(int x, int shift)
+{
+    return x + shift;
+}
+
+__device__
+int cudamod(int x, int mod)
+{
+    return x % mod;
+}
+
+
+#define SIMPLE_BINARY_FUNCTION_GPU(funcName, origFunc)                      \
+__global__                                                                  \
+void Kernel##funcName(int * a, int * b, int size, int num)                  \
+{                                                                           \
+    int i = blockDim.x * blockIdx.x + threadIdx.x;                          \
+                                                                            \
+    if (i < size)                                                           \
+        b[i] = (int)origFunc(a[i], num);                                    \
+}                                                                           \
+                                                                            \
+void _Cuda##funcName(const XTensor * a, XTensor * b, int num)               \
+{                                                                           \
+    CheckNTErrors((XTensor::IsSameShaped(a, b)),                            \
+                  "Input tensors should have the same type!");              \
+    CheckNTErrors((a->isSparse == false), "TODO!");                         \
+                                                                            \
+    int gridSize[3];                                                        \
+    int blockSize[3];                                                       \
+                                                                            \
+    GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);         \
+                                                                            \
+    dim3 blocks(gridSize[0]);                                               \
+    dim3 threads(blockSize[0]);                                             \
+                                                                            \
+    int devIDBackup;                                                        \
+    ProtectCudaDev(a->devID, devIDBackup);                                  \
+                                                                            \
+    if (a->dataType == X_INT) {                                             \
+        Kernel##funcName<<<blocks, threads>>>                               \
+                         ((int*)a->data, (int*)b->data, a->unitNum, num);   \
+    }                                                                       \
+    else {                                                                  \
+        ShowNTErrors("TODO!");                                              \
+    }                                                                       \
+                                                                            \
+    BacktoCudaDev(a->devID, devIDBackup);                                   \
+}                                                                           \
+
+SIMPLE_BINARY_FUNCTION_GPU(Scale, cudascale)
+SIMPLE_BINARY_FUNCTION_GPU(DeScale, cudadescale)
+SIMPLE_BINARY_FUNCTION_GPU(Shift, cudashift)
+SIMPLE_BINARY_FUNCTION_GPU(Mod, cudamod)
+
+#endif // USE_CUDA
+
+} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
--- a/source/tensor/core/math/Binary.cuh
+++ b/source/tensor/core/math/Binary.cuh
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2019-04-05
+*/
+
+#ifndef __BINARY_CUH__
+#define __BINARY_CUH__
+
+#include "../../XTensor.h"
+#include "Binary.h"
+
+namespace nts { // namespace nts(NiuTrans.Tensor)
+
+#ifdef USE_CUDA
+
+/* scale each entry (CUDA Kernel) */
+__global__
+void KernelScale(int * a, int * b, int size, int num);
+/* scale each entry */
+void _CudaScale(const XTensor * a, XTensor * b, int num);
+
+/* descale each entry (CUDA Kernel) */
+__global__
+void KernelDeScale(int * a, int * b, int size, int num);
+/* descale each entry */
+void _CudaDeScale(const XTensor * a, XTensor * b, int num);
+
+/* shift each entry (CUDA Kernel) */
+__global__
+void KernelShift(int * a, int * b, int size, int num);
+/* shift each entry */
+void _CudaShift(const XTensor * a, XTensor * b, int num);
+
+/* mod each entry (CUDA Kernel) */
+__global__
+void KernelMod(int * a, int * b, int size, int num);
+/* mod each entry */
+void _CudaMod(const XTensor * a, XTensor * b, int num);
+
+#endif // USE_CUDA
+
+} // namespace nts(NiuTrans.Tensor)
+
+#endif // __BINARY_CUH__
--- a/source/tensor/core/math/Binary.h
+++ b/source/tensor/core/math/Binary.h
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: JIANG Yufan (email: jiangyufan2018@outlook.com) 2019-04-05
+*/
+
+#ifndef __BINARY_H__
+#define __BINARY_H__
+
+#include "../../XTensor.h"
+
+namespace nts { // namespace nts(NiuTrans.Tensor)
+
+/*
+scale all tensor entires
+b = a * scale
+*/
+void _Scale(const XTensor * a, XTensor * b, int num);
+
+/*
+scale tensor entires
+make a new tensor to keep the result and return it
+b = a * scale
+*/
+XTensor Scale(const XTensor & a, int num);
+
+//void Scale(const XTensor & a, XTensor & b, int num);
+
+/*
+descale tensor entires
+b = a / scale
+*/
+void _DeScale(const XTensor * a, XTensor * b, int num);
+
+/*
+descale tensor entires
+make a new tensor to keep the result and return it
+b = a / scale
+*/
+XTensor DeScale(const XTensor & a, int num);
+
+/*
+shift tensor entires
+b = a + shift
+*/
+void _Shift(const XTensor * a, XTensor * b, int num);
+
+/*
+shift tensor entires
+make a new tensor to keep the result and return it
+b = a + shift
+*/
+XTensor Shift(const XTensor & a, int num);
+
+/*
+mod tensor entires
+b = a % mod
+*/
+void _Mod(const XTensor * a, XTensor * b, int num);
+
+/*
+mod tensor entires
+make a new tensor to keep the result and return it
+b = a % mod
+*/
+XTensor Mod(const XTensor & a, int num);
+
+} // namespace nts(NiuTrans.Tensor)
+
+#endif // end __BINARY_H__