对于不同情况执行不同SoftMax的计算函数

002692e7 · 张裕浩 · acc044b2 · 002692e7
Commit 002692e7 authored Aug 02, 2018 by 张裕浩
--- a/source/tensor/function/Softmax.cu
+++ b/source/tensor/function/Softmax.cu
@@ -223,8 +223,13 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
    int cudaGridSize[3];
    int cudaBlockSize[3];
+    if (leadDim != 0 || dimensionSize <= 10)
+    {
        //allocate thread num for old function
-    //GDevs.GetCudaThread2D(x->devID, stride * blockNum, dimensionSize, MAX_INT, cudaGridSize, cudaBlockSize);
+        GDevs.GetCudaThread2D(x->devID, stride * blockNum, dimensionSize, MAX_INT, cudaGridSize, cudaBlockSize);
+    }
+    else
+    {
        //allocate thread num for new function
        GDevs.GetCudaThread2D(x->devID, dimensionSize, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize);
        if (cudaBlockSize[0] < 32)
@@ -236,19 +241,23 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
                cudaBlockSize[1] = 32;
            }
        }
+    }
    int devIDBackup;
    ProtectCudaDev(x->devID, devIDBackup);
    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
-        /*KernelSoftmaxComputeTensor<<<dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1])>>>
+        if (leadDim != 0 || dimensionSize <= 10)
+        {
+            KernelSoftmaxComputeTensor << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> >
                                        ((DTYPE*)x->data, (DTYPE*)max->data, (DTYPE*)sum->data, (DTYPE*)y->data,
                                        stride, dimensionSize, stride * dimensionSize, blockNum, stride * blockNum);
-                                     */
+        }
+        else
+        {
            KernelSoftmaxComputeTensorUseBroadcast << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> >
                                       ((DTYPE*)x->data, (DTYPE*)max->data, (DTYPE*)sum->data, (DTYPE*)y->data,
                                         stride, dimensionSize, blockNum);
-        printf("%d %d %d %d\n", cudaGridSize[0], cudaGridSize[1], cudaBlockSize[0], cudaBlockSize[1]);
+        }
    }
    else if(x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16){
        KernelSoftmaxComputeTensor<<<dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1])>>>