code cleaning and CPU code update

a52ba88e · xiaotong · 314f4370 · a52ba88e · a52ba88e · a52ba88e
Commit a52ba88e authored Jul 16, 2019 by xiaotong
--- a/source/network/XNet.cpp
+++ b/source/network/XNet.cpp
@@ -187,7 +187,7 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, 
        node->visitMark = NODE_UNFINISHED;
    }

-    XLossGrad lossGrad;
+    //XLossGrad lossGrad;

    /* we start with the gradient with respect to the loss for output layers */
    /*for(int i = 0; i < roots.count; i++){

--- a/source/sample/transformer/T2TSearch.cpp
+++ b/source/sample/transformer/T2TSearch.cpp
@@ -298,7 +298,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
       row means a previous state. The column number is size-of-beam \times vocab-size. We,
       therefore, divide entries of the top-k index by vocab-size to compute the id of the
       previous state for each hypothesis in the top-k list. */
-    Descale(preID, sizeVocab);
+    _DescaleMe(preID, sizeVocab);
    
    /* Then, we do something similar to "preID". For the top-k predictions, we need 
       to know their indices in the vocabulary. We compute the offset of each prediction

--- a/source/tensor/XMem.cpp
+++ b/source/tensor/XMem.cpp
@@ -1508,16 +1508,27 @@ XMemManager::~XMemManager()
 MTYPE XMemManager::GetAvailableMemory()
 {
    unsigned long freeMem = 0;
-#ifndef WIN32
-    long pages = sysconf(_SC_AVPHYS_PAGES);
-    long page_size = sysconf(_SC_PAGE_SIZE);
-    freeMem = pages * page_size;
-#else
+#if __APPLE__
+    int mib[2] = {CTL_HW, HW_MEMSIZE};
+    unsigned int namelen = sizeof(mib) / sizeof(mib[0]);
+    unsigned long long size;
+    size_t len = sizeof(size);
+    if (sysctl(mib, namelen, &size, &len, NULL, 0) < 0){
+        ShowNTErrors("Cannot get memory size on Mac!");
+    }
+    else{
+        return size;
+    }
+#elif _WIN32
    MEMORYSTATUSEX memoryStatus;
    memoryStatus.dwLength = sizeof(memoryStatus);
    if (GlobalMemoryStatusEx(&memoryStatus)){
        freeMem = memoryStatus.ullAvailPhys;
    }
+#else
+    long pages = sysconf(_SC_AVPHYS_PAGES);
+    long page_size = sysconf(_SC_PAGE_SIZE);
+    freeMem = pages * page_size;
 #endif
    return (MTYPE)freeMem;
 }
@@ -1526,8 +1537,9 @@ MTYPE XMemManager::GetAvailableMemory()
 MTYPE XMemManager::GetAvailableGPUMemory(int devID)
 {
    size_t freeMem = 0;
-    size_t totalMem = 0;
+    
 #ifdef USE_CUDA
+    size_t totalMem = 0;
    cudaSetDevice(devID);
    if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
        XPRINT(0, stderr, "cannot get GPU memory information.");
@@ -1638,12 +1650,12 @@ void XMemManager::ShowMemInfo()
    int myBlockNum;
    for(int i = 0; i < nCPUMem; i++){
        GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
-        XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%d, blockNum:%d, bufSize:%d\n", myBlockSize, myBlockNum, myBufSize);
+        XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", myBlockSize, myBlockNum, myBufSize);
    }

    for(int i = 0; i < nGPUMem; i++){
        GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
-        XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%d, blockNum:%d, bufSize:%d\n", i, myBlockSize, myBlockNum, myBufSize);
+        XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", i, myBlockSize, myBlockNum, myBufSize);
    }
 }


--- a/source/tensor/XMem.h
+++ b/source/tensor/XMem.h
@@ -39,10 +39,13 @@
 #include <curand.h>
 #endif

-#ifndef WIN32
-#include <unistd.h>
-#else
+#ifdef __APPLE__
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#elif WIN32
 #include <windows.h>
+#else
+#include <unistd.h>
 #endif

 /* the nts (NiuTrans.Tensor) namespace */

--- a/source/tensor/core/getandset/OnehotAndIndex.cpp
+++ b/source/tensor/core/getandset/OnehotAndIndex.cpp
@@ -111,10 +111,9 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo

    onehot->SetZeroAll();

+#ifdef USE_CUDA
    float confidence = 1 - labelSmoothingP;
    float lowconfidence = labelSmoothingP / size;
-
-#ifdef USE_CUDA
    if(onehot->devID >= 0 && index->devID >= 0) {
        _CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
        return;

--- a/source/tensor/core/math/Binary.cpp
+++ b/source/tensor/core/math/Binary.cpp
@@ -170,14 +170,10 @@ SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)

 #else
 /* define three marco separately, specify the respective function names (CPU mode) */
-#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, _cudaFuncName, origFunc)     \
+#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, origFunc)                    \
 void _funcName(const XTensor * a, XTensor * b, int num)                     \
 {                                                                           \
-    /* run it on GPUs */                                                    \
-    if (a->devID >= 0) {                                                    \
-        _cudaFuncName(a, b, num);                                           \
-        return;                                                             \
-    }                                                                       \
+    CheckNTErrors(a->devID < 0, "No GPU code is supported");                \
    CheckNTErrors((XTensor::IsSameShaped(a, b)),                            \
                "Input tensors should have the same data type!");           \
    CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!");   \
@@ -187,14 +183,10 @@ void _funcName(const XTensor * a, XTensor * b, int num)                     \
        db[i] = (int)origFunc(d[i], num);                                   \
 }                                                                           \

-#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc)         \
+#define _SIMPLE_BINARY_FUNCTION(_funcName, origFunc)         \
 void _funcName(const XTensor * a, XTensor * b, float num)                   \
 {                                                                           \
-    /* run it on GPUs */                                                    \
-    if (a->devID >= 0) {                                                    \
-        _cudaFuncName(a, b, num);                                           \
-        return;                                                             \
-    }                                                                       \
+    CheckNTErrors(a->devID < 0, "No GPU code is supported");                \
    CheckNTErrors((XTensor::IsSameShaped(a, b)),                            \
                "Input tensors should have the same data type!");           \
    CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\
@@ -228,34 +220,36 @@ void funcName(const XTensor &a, XTensor &b, float num)                      \
    _funcName(&a, &b, num);                                                 \
 }                                                                           \

-_SIMPLE_BINARY_FUNCTION_INT(_Scale, _CudaScale, scale)
-SIMPLE_BINARY_FUNCTION_ME_INT(Scale, _Scale)
+    
+_SIMPLE_BINARY_FUNCTION_INT(_Scale, scale)
+SIMPLE_BINARY_FUNCTION_ME_INT(_ScaleMe, _Scale)
 SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale)

-_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScaleFloat, scale)
-SIMPLE_BINARY_FUNCTION_ME(Scale, _Scale)
+_SIMPLE_BINARY_FUNCTION(_Scale, scale)
+SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
 SIMPLE_BINARY_FUNCTION(Scale, _Scale)
    
-_SIMPLE_BINARY_FUNCTION_INT(_Descale, _CudaDescale, descale)
-SIMPLE_BINARY_FUNCTION_ME_INT(Descale, _Descale)
+_SIMPLE_BINARY_FUNCTION_INT(_Descale, descale)
+SIMPLE_BINARY_FUNCTION_ME_INT(_DescaleMe, _Descale)
 SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale)

-_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescaleFloat, descale)
-SIMPLE_BINARY_FUNCTION_ME(Descale, _Descale)
+_SIMPLE_BINARY_FUNCTION(_Descale, descale)
+SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
 SIMPLE_BINARY_FUNCTION(Descale, _Descale)
    
-_SIMPLE_BINARY_FUNCTION_INT(_Shift, _CudaShift, shift)
-SIMPLE_BINARY_FUNCTION_ME_INT(Shift, _Shift)
+_SIMPLE_BINARY_FUNCTION_INT(_Shift, shift)
+SIMPLE_BINARY_FUNCTION_ME_INT(_Shift, _Shift)
 SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift)

-_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShiftFloat, shift)
-SIMPLE_BINARY_FUNCTION_ME(Shift, _Shift)
+_SIMPLE_BINARY_FUNCTION(_Shift, shift)
+SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
 SIMPLE_BINARY_FUNCTION(Shift, _Shift)
    
-_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
-SIMPLE_BINARY_FUNCTION_ME_INT(Mod, _Mod)
+_SIMPLE_BINARY_FUNCTION_INT(_Mod, mod)
+SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
 SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)

+    
 #endif

 } // namespace nts(NiuTrans.Tensor)
--- a/source/tensor/function/Dropout.cpp
+++ b/source/tensor/function/Dropout.cpp
@@ -149,7 +149,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    XTensor mask;
-    int * maskArrayInt = NULL;
    DTYPE * maskArray = NULL;
    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);