bug fixes

9f91f159 · xiaotong · 7c68f1e7 · 9f91f159
Commit 9f91f159 authored Jul 23, 2019 by xiaotong
--- a/source/tensor/core/sort/TopK.cu
+++ b/source/tensor/core/sort/TopK.cu
@@ -440,7 +440,7 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
    eachHeapMaxValue[threadIdx.y * blockDim.x + threadIdx.x] = minData;
    //need more optimation
    if (i == 0) {
-        int threadLimit = (threadIdx.y + 1) * blockDim.x;
+        int threadLimit = threadIdx.y * blockDim.x + min(blockDim.x, strideNum);
        CudaXHeap<MIN_HEAP, T> chooseHeap(k, heapData + k * ((blockDim.x * blockDim.y) + threadIdx.y));
        int counter = threadIdx.y * blockDim.x;
        for (; counter < threadIdx.y * blockDim.x + k; ++counter) {
@@ -888,4 +888,4 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)

 #endif // USE_CUDA

-} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
+} // namespace nts(NiuTrans.Tensor)