email updated

e434b79c · linye · e605710c · e434b79c · e434b79c · e434b79c
Commit e434b79c authored Jul 06, 2019 by linye
--- a/source/network/Main.cpp
+++ b/source/network/Main.cpp
@@ -46,6 +46,7 @@ void HardTanHFP16Test();
 void ReduceMaxFP16Test();
 void ReduceSumFP16Test();
 void LogSoftmaxFP16Test();
+void ClipFP16Test();

 using namespace nts;
 using namespace fnnlm;
@@ -82,6 +83,9 @@ int main(int argc, const char ** argv )
    //LogSoftmaxFP16Test();
    //return 0;

+    ClipFP16Test();
+    return 0;
+
 	if (argc > 1 && !strcmp(argv[1], "-test"))
 		Test();
    else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
@@ -100,6 +104,26 @@ int main(int argc, const char ** argv )
    return 0;
 }

+void ClipFP16Test() {
+    XTensor a;
+    XTensor intA;
+    XTensor b;
+    XTensor intB;
+
+    InitTensor2D(&a, 1, 10, X_FLOAT, 0);
+    a.SetDataRand(-10.0F, 10.0F);
+
+    a.Dump(stderr, "a:");
+
+    intA = ConvertDataType(a, X_INT);
+
+    intB = Clip(intA, -1, 1);
+
+    b = ConvertDataType(intB, X_FLOAT);
+
+    b.Dump(stderr, "b:");
+}
+
 void LogSoftmaxFP16Test() {

    XTensor a;

--- a/source/tensor/core/arithmetic/Div.cu
+++ b/source/tensor/core/arithmetic/Div.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-05 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-05 float16 added
 */

 #include "../../XDevice.h"

--- a/source/tensor/core/arithmetic/Sum.cu
+++ b/source/tensor/core/arithmetic/Sum.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-02 float16/int/int8 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-02 float16/int/int8 added
 */

 #include "../../XDevice.h"

--- a/source/tensor/core/arithmetic/XTensorBLAS.cu
+++ b/source/tensor/core/arithmetic/XTensorBLAS.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "../../XUtility.h"

--- a/source/tensor/core/math/Clip.cu
+++ b/source/tensor/core/math/Clip.cu
@@ -17,6 +17,7 @@

 /*
 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16/int/int8 added
 */

 #include "../../XDevice.h"
@@ -35,34 +36,20 @@ set each entry to its clip value (CUDA Kernel)
 >> upper - the upper border
 >> size - size of the data array
 */
+template <class T>
 __global__
-	void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
+void KernelClip(T * a, T * b, T lower, T upper, int size)
 {
-	int i = blockDim.x * blockIdx.x + threadIdx.x;
-
-	if (i < size) {
-		if (a[i] > upper)
-			b[i] = upper;
-		else if (a[i] < lower)
-			b[i] = lower;
-		else
-			b[i] = a[i];
-	}
-}
+    int i = blockDim.x * blockIdx.x + threadIdx.x;

-/*
-set each entry to its clip value with float16 data type value (CUDA Kernel)
-This is for float16 computation
->> a - pointer to input data array
->> b - pointer to output data array
->> lower - the lower border
->> upper - the upper border
->> size - size of the data array
-*/
-__global__
-void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
-{
-	return;
+    if (i < size) {
+        if (a[i] > upper)
+            b[i] = upper;
+        else if (a[i] < lower)
+            b[i] = lower;
+        else
+            b[i] = a[i];
+    }
 }

 /*
@@ -88,12 +75,30 @@ void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
 	int devIDBackup;
 	ProtectCudaDev(a->devID, devIDBackup);

-	if (a->dataType == DEFAULT_DTYPE) {
-		KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
-	}
-	else if (a->dataType == X_FLOAT16) {
-		KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
-	}
+    if (a->dataType == DEFAULT_DTYPE) {
+        KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
+    }
+    else if (a->dataType == X_FLOAT16) {
+        unsigned short temp1 = FloatToFloat16(lower);
+        unsigned short temp2 = FloatToFloat16(upper);
+
+        half lower1 = *((half *)&temp1);
+        half upper1 = *((half *)&temp2);
+
+        KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower1, upper1, a->unitNum);
+    }
+    else if (a->dataType == X_INT) {
+        int lower1 = (int)lower;
+        int upper1 = (int)upper;
+
+        KernelClip << <blocks, threads >> >((int *)a->data, (int *)b->data, lower1, upper1, a->unitNum);
+    }
+    else if (a->dataType == X_INT8) {
+        __int8 lower1 = (__int8)lower;
+        __int8 upper1 = (__int8)upper;
+
+        KernelClip << <blocks, threads >> >((__int8 *)a->data, (__int8 *)b->data, lower1, upper1, a->unitNum);
+    }
 	else {
 		ShowNTErrors("TODO!");
 	}

--- a/source/tensor/core/math/Clip.cuh
+++ b/source/tensor/core/math/Clip.cuh
@@ -29,8 +29,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #ifdef USE_CUDA

 /* set each entry to its clip value (CUDA Kernel) */
-__global__
-void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size);
+template <class T> __global__
+void KernelClip(T * a, T * b, T lower, T upper, int size);

 /* set each entry to its clip value (CUDA Kernel) with float16 data type*/
 __global__

--- a/source/tensor/core/reduce/ReduceMax.cu
+++ b/source/tensor/core/reduce/ReduceMax.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-03 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-03 float16 added
 */

 #include "../../XDevice.h"

--- a/source/tensor/core/reduce/ReduceSum.cu
+++ b/source/tensor/core/reduce/ReduceSum.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-03 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-03 float16 added
 */

 #include "../../XDevice.h"

--- a/source/tensor/function/HardTanH.cu
+++ b/source/tensor/function/HardTanH.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-04 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-04 float16 added
 */

 #include "HardTanH.h"

--- a/source/tensor/function/LogSoftmax.cu
+++ b/source/tensor/function/LogSoftmax.cu
@@ -17,7 +17,7 @@

 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-26
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-01 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-01 float16 added
 */

 #include "LogSoftmax.h"

--- a/source/tensor/test/TConvertDataType.cpp
+++ b/source/tensor/test/TConvertDataType.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
- * $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 int8 added
+ * $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 int8 added
 */

 #include "TConvertDataType.h"

--- a/source/tensor/test/TDiv.cpp
+++ b/source/tensor/test/TDiv.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-01
- * $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+ * $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "TDiv.h"

--- a/source/tensor/test/THardTanH.cpp
+++ b/source/tensor/test/THardTanH.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "../XTensor.h"

--- a/source/tensor/test/TLogSoftmax.cpp
+++ b/source/tensor/test/TLogSoftmax.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "../XUtility.h"

--- a/source/tensor/test/TMatrixMul.cpp
+++ b/source/tensor/test/TMatrixMul.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-14
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "TMatrixMul.h"

--- a/source/tensor/test/TReduceMax.cpp
+++ b/source/tensor/test/TReduceMax.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-30
-* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "TReduceMax.h"

--- a/source/tensor/test/TReduceSum.cpp
+++ b/source/tensor/test/TReduceSum.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
- * $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added
+ * $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
 */

 #include "TReduceSum.h"

--- a/source/tensor/test/TSum.cpp
+++ b/source/tensor/test/TSum.cpp
@@ -17,7 +17,7 @@

 /*
 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
- * $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16/int/int8 added
+ * $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16/int/int8 added
 */

 #include "TSum.h"