bug fixes

75124987 · xiaotong · a26caf40 · 75124987 · 75124987 · 75124987
Commit 75124987 authored Jul 31, 2018 by xiaotong
--- a/source/network/Main.cpp
+++ b/source/network/Main.cpp
@@ -34,7 +34,7 @@ void TransposeTest();
 void SumDimTest();

 using namespace nts;
-using namespace samplefnnlm;
+using namespace fnnlm;

 int main( int argc, const char ** argv )
 {

--- a/source/network/XBackwardMath.cpp
+++ b/source/network/XBackwardMath.cpp
@@ -143,7 +143,7 @@ void XMathGrad::GradSumDim(XTensor * node)
           Then reduce along with z and x to obtain dE/db. */
        node->grad->Reshape(3, reshapedSize);

-        XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->devID, b->mem, b->dataType, b->denseRatio);
+        XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);

        _ReduceSum(node->grad, interGrad, 2);


--- a/source/sample/fnnlm/FNNLM.cpp
+++ b/source/sample/fnnlm/FNNLM.cpp
@@ -33,7 +33,7 @@
 #include "../../tensor/function/FHeader.h"
 #include "../../network/XNet.h"

-namespace samplefnnlm
+namespace fnnlm
 {

 #define MAX_NAME_LENGTH 1024

--- a/source/sample/fnnlm/FNNLM.h
+++ b/source/sample/fnnlm/FNNLM.h
@@ -36,7 +36,7 @@

 using namespace nts;

-namespace samplefnnlm
+namespace fnnlm
 {

 #define _EXIT_(x)// exit(x)
@@ -126,7 +126,7 @@ struct FNNNet
    XTensor output;
 };

-/* entry of the program */
+/* entrance of the program */
 int FNNLMMain(int argc, const char ** argv);

 };

--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -1891,17 +1891,16 @@ generate a XTensor which allocates data on the buffer
 >> myDenseRatio - how often an element has non-zero value

 */
-XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, int devID, XMem * myMem,
-                       const TENSOR_DATA_TYPE myDataType, const float myDenseRatio)
+XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
+                       const TENSOR_DATA_TYPE myDataType, const float myDenseRatio,
+                       const int devID, XMem * myMem)
 {
-    CheckNTErrors(myMem != NULL, "No memory pool specified!");
-
    int dims[MAX_TENSOR_DIM_NUM];
    memcpy(dims, myDimSize, sizeof(int) * myOrder);

    dims[0] = -abs(dims[0]);

-    XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, -1, myMem);
+    XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, devID, myMem);

    if(myMem != NULL)
        tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize);
@@ -1922,8 +1921,8 @@ generate a XTensor which allocates data on the buffer
 XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem)
 {
    return NewTensorBuf(reference->order, reference->dimSize, 
-                        devID, myMem, 
-                        reference->dataType, reference->denseRatio);
+                        reference->dataType, reference->denseRatio,
+                        devID, myMem);
 }

 /* 

--- a/source/tensor/XTensor.h
+++ b/source/tensor/XTensor.h
@@ -391,8 +391,9 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_
                    const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);

 /* generate a XTensor which allocates data on the buffer */
-XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, int devID, XMem * myMem,
-                       const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F);
+XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
+                       const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
+                       const int myDevID = -1, XMem * myMem = NULL);

 /* generate a XTensor which allocates data on the buffer */
 XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);

--- a/source/tensor/function/LogSoftmax.cpp
+++ b/source/tensor/function/LogSoftmax.cpp
@@ -38,6 +38,14 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i})
 */
 void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
 {
+    CheckNTErrors(!x->isSparse && !y->isSparse, "TODO!");
+    CheckNTErrors(x && y, "Empty input tensors!");
+
+    if(y->dimSize[leadDim] == 1){
+        y->SetZeroAll();
+        return;
+    }
+
    int leadDimRDI = x->order - leadDim - 1;
    if (!x->isSparse && !y->isSparse &&
        x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
@@ -63,30 +71,31 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
        int blockSize = 1;
        int blockNum = 1;

-        for (int i = 0; i < leadDimRDI; i++)
-            stride *= y->dimSizeRDI[i];
-        blockSize = stride * dimensionSize;
-        blockNum = y->unitNum / blockSize;
-
-        max = NewTensor(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
-        sum = NewTensor(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
-
-        max->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, max->unitNum * max->unitSize) : XMemAlloc(max->devID, max->unitNum * max->unitSize);
-        sum->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, sum->unitNum * sum->unitSize) : XMemAlloc(sum->devID, sum->unitNum * sum->unitSize);
+        max = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
+        sum = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);

        _ReduceMax(x, max, leadDim);
        _ReduceSum(x, sum, leadDim, max, 1.0F, true);

        if (x->devID >= 0) {
-            int dims[2];
-            dims[0] = -stride;
-            dims[1] = dimensionSize;
-            blockx = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem);
-            blocky = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem);
-            dims[0] = -stride;
-            dims[1] = 1;
-            blockMax = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem);
-            blockSum = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem);
+            if(leadDimRDI == 0){
+                blockSize = y->unitNum;
+                blockNum  = 1;
+                blockx = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
+                blocky = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
+                blockMax = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
+                blockSum = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
+            }
+            else{
+                for (int i = 0; i < leadDimRDI; i++)
+                    stride *= y->dimSizeRDI[i];
+                blockSize = stride * dimensionSize;
+                blockNum = y->unitNum / blockSize;
+                blockx = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem);
+                blocky = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem);
+                blockMax = NewTensor2D(-stride, 1, x->dataType, x->devID, mem);
+                blockSum = NewTensor2D(-stride, 1, x->dataType, x->devID, mem);
+            }
        }

        for (int k = 0; k < blockNum; k++) {
@@ -123,6 +132,9 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
                blockMax->data = mp;
                blockSum->data = sp;
 #ifdef USE_CUDA
+                if(leadDimRDI == 0)
+                    _CudaLogSoftmaxSumMax(blockx, blocky, 1, blockSum, blockMax);
+                else
                    _CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax);
 #else
                ShowNTErrors("Please specify USE_CUDA and recompile the code!");
@@ -135,18 +147,8 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
        }

        if (x->devID < 0) {
-            if (mem != NULL) {
-                mem->ReleaseBuf(mem->devID, max->unitNum * max->unitSize);
-                mem->ReleaseBuf(mem->devID, sum->unitNum * sum->unitSize);
-            }
-            else {
-                XMemFree(max->devID, max->data);
-                XMemFree(sum->devID, sum->data);
-                max->data = NULL;
-                sum->data = NULL;
-            }
-            delete max;
-            delete sum;
+            DelTensorBuf(max);
+            DelTensorBuf(sum);
        }
        else {
            delete blockx;