no message

6a3d713a · linye · 30217de4 · 6a3d713a · 6a3d713a · 6a3d713a
Commit 6a3d713a authored Jul 08, 2019 by linye
--- a/source/tensor/core/arithmetic/MultiplyDim.cu
+++ b/source/tensor/core/arithmetic/MultiplyDim.cu
@@ -169,6 +169,35 @@ void _CudaMultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, 
            ShowNTErrors("Something is wrong!");
        }
    }
+    if (a->dataType == X_FLOAT16) {
+        unsigned short temp = FloatToFloat16(alpha);
+        half alpha1 = *((half *)&temp);
+        if (stride > 1) {
+            GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
+            if (alpha == 0.0F)
+                KernelMultiplyWithCol<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
+                                                      ((__half*)a->data, (__half*)b->data, (__half*)c->data,
+                                                        blockSize, stride, blockSize * stride, blockNum, alpha1);
+            else
+                KernelMultiplyWithCol<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
+                                                     ((__half*)a->data, (__half*)b->data, (__half*)c->data,
+                                                       blockSize, stride, blockSize * stride, blockNum, alpha1);
+        }
+        else if (stride == 1) {
+            GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
+            if (alpha == 0.0F)
+                KernelMultiplyWithRow<__half, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
+                                                      ((__half*)a->data, (__half*)b->data, (__half*)c->data,
+                                                        blockNum, blockSize, alpha1);
+            else
+                KernelMultiplyWithRow<__half, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
+                                                     ((__half*)a->data, (__half*)b->data, (__half*)c->data,
+                                                       blockNum, blockSize, alpha1);
+        }
+        else {
+            ShowNTErrors("Something is wrong!");
+        }
+    }
    else {
        ShowNTErrors("TODO!");
    }

--- a/source/tensor/core/arithmetic/Negate.cu
+++ b/source/tensor/core/arithmetic/Negate.cu
@@ -33,8 +33,9 @@ set each entry to its negtive value (CUDA Kernel)
 >> b - pointer to the output data array
 >> size - size of the data array
 */
+template <class T>
 __global__
-void KernelNegate(DTYPE * a, DTYPE * b, int size)
+void KernelNegate(T * a, T * b, int size)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;

@@ -42,26 +43,26 @@ void KernelNegate(DTYPE * a, DTYPE * b, int size)
        b[i] = -a[i];
 }

-/*
-set each entry to its negtive value (CUDA Kernel)
-This is for float16 computation
->> a - pointer to the input data array
->> b - pointer to the output data array
->> size - size of the data array
-*/
-__global__
-void KernelNegate(__half * a, __half * b, int size)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-
-#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
-        if (i < size)
-            b[i] = __hsub(__float2half(0), a[i]);
-#else
-        if (i < size)
-            b[i] = __float2half(-__half2float(a[i]));
-#endif
-}
+///*
+//set each entry to its negtive value (CUDA Kernel)
+//This is for float16 computation
+//>> a - pointer to the input data array
+//>> b - pointer to the output data array
+//>> size - size of the data array
+//*/
+//__global__
+//void KernelNegate(__half * a, __half * b, int size)
+//{
+//    int i = blockDim.x * blockIdx.x + threadIdx.x;
+//
+//#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
+//        if (i < size)
+//            b[i] = __hsub(__float2half(0), a[i]);
+//#else
+//        if (i < size)
+//            b[i] = __float2half(-__half2float(a[i]));
+//#endif
+//}

 /*
 set each entry to its negtive value

--- a/source/tensor/function/LogSoftmax.cu
+++ b/source/tensor/function/LogSoftmax.cu
--- a/source/tensor/test/Test.cpp
+++ b/source/tensor/test/Test.cpp
@@ -84,7 +84,7 @@ bool Test()
 	//wrong = !TestDropout() || wrong;
    //wrong = !TestHardTanH() || wrong;
    //wrong = !TestIdentity() || wrong;
-    //wrong = !TestLogSoftmax() || wrong;
+    wrong = !TestLogSoftmax() || wrong;
    //wrong = !TestLoss() || wrong;
    //wrong = !TestRectify() || wrong;
    //wrong = !TestSigmoid() || wrong;