Merge with HU Chi branch.

1. Offer global setting for enableGrad. 2. Update XList. 3. Minor error fixed.

Merge with HU Chi branch.
1. Offer global setting for enableGrad. 2. Update XList. 3. Minor error fixed.
137df5d9 · liyinqiao · b3dfbdcd · 137df5d9 · 137df5d9 · 137df5d9
Commit 137df5d9 authored Mar 12, 2020 by liyinqiao
--- a/source/tensor/XCall.h
+++ b/source/tensor/XCall.h
@@ -26,6 +26,9 @@
 namespace nts { // namespace nts(NiuTrans.Tensor)
+/* default settings */
+#define X_ENABLE_GRAD false
 /*
 * we define the "new and delete" functions below
 */
@@ -38,13 +41,13 @@ void InitTensorV2(XTensor * tensor,
 /* initialize a dense XTensor */
 void InitTensor(XTensor * tensor,
                const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
-                const int myDevID = -1, const bool isEnableGrad = true);
+                const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a scalar V2 */
 void InitTensor0DV2(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
 /* initialize a scalar */
-void InitTensor0D(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+void InitTensor0D(XTensor * tensor, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a dense vector V2 */
 void InitTensor1DV2(XTensor * tensor, const int num, 
@@ -52,7 +55,7 @@ void InitTensor1DV2(XTensor * tensor, const int num,
 /* initialize a dense vector */
 void InitTensor1D(XTensor * tensor, const int num, 
-                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a dense matrix V2 */
 void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
@@ -60,7 +63,7 @@ void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
 /* initialize a dense matrix */
 void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
-                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a dense 3d tensor V2 */
 void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
@@ -68,7 +71,7 @@ void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
 /* initialize a dense 3d tensor */
 void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
-                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a dense 4d tensor V2 */
 void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
@@ -76,7 +79,7 @@ void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, 
 /* initialize a dense 4d tensor */
 void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
-                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a dense 5d tensor V2 */
 void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
@@ -84,7 +87,7 @@ void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, 
 /* initialize a dense 5d tensor */
 void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
-                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+                  const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* initialize a tensor with a reference tensor V2 */
 void InitTensorV2(XTensor * tensor, const XTensor * reference);
@@ -104,7 +107,7 @@ XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DAT
 /* generate a dense XTensor */
 XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
-                    const int myDevID = -1, const bool isEnableGrad = true);
+                    const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a XTensor which allocates data on the buffer V2 */
 XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
@@ -113,26 +116,26 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
 /* generate a dense XTensor which allocates data on the buffer */
 XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
-                       const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+                       const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a XTensor which allocates data on the buffer V2 */
 XTensor * NewTensorBufV2(const XTensor * reference, int devID, XMem * myMem);
 /* generate a XTensor which allocates data on the buffer */
-XTensor * NewTensorBuf(const XTensor * reference, int devID, const bool isEnableGrad = true);
+XTensor * NewTensorBuf(const XTensor * reference, int devID, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a scalar V2 */
 XTensor * NewTensor0DV2(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
 /* generate a scalar */
-XTensor * NewTensor0D(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+XTensor * NewTensor0D(const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a dense vector V2 */
 XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, 
                        XMem * myMem = NULL);
 /* generate a dense vector */
-XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
+XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a dense matrix V2 */
 XTensor * NewTensor2DV2(const int rowNum, const int colNum, 
@@ -142,7 +145,7 @@ XTensor * NewTensor2DV2(const int rowNum, const int colNum,
 /* generate a dense matrix */
 XTensor * NewTensor2D(const int rowNum, const int colNum, 
                      const TENSOR_DATA_TYPE myDataType = X_FLOAT, 
-                      const int myDevID = -1, const bool isEnableGrad = true);
+                      const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a dense 3d tensor V2 */
 XTensor * NewTensor3DV2(const int d0, const int d1, const int d2, 
@@ -152,7 +155,7 @@ XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
 /* generate a dense 3d tensor */
 XTensor * NewTensor3D(const int d0, const int d1, const int d2, 
                      const TENSOR_DATA_TYPE myDataType = X_FLOAT, 
-                      const int myDevID = -1, const bool isEnableGrad = true);
+                      const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a dense 4d tensor V2 */
 XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
@@ -162,7 +165,7 @@ XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
 /* generate a dense 4d tensor */
 XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
                      const TENSOR_DATA_TYPE myDataType = X_FLOAT, 
-                      const int myDevID = -1, const bool isEnableGrad = true);
+                      const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a dense 5d tensor V2 */
 XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
@@ -172,10 +175,10 @@ XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, 
 /* generate a dense 5d tensor */
 XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
                      const TENSOR_DATA_TYPE myDataType = X_FLOAT, 
-                      const int myDevID = -1, const bool isEnableGrad = true);
+                      const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a dense vector by range */
-XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = true);
+XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = X_ENABLE_GRAD);
 /* generate a copy of XTensor (with a reference to a given tensor) */
 XTensor * NewTensor(const XTensor * a, bool isFilledData = true);

--- a/source/tensor/XList.cpp
+++ b/source/tensor/XList.cpp
@@ -249,26 +249,6 @@ inline int TensorListBase<T>::FindFirst(const T& item)
    return -1;
 }
-template <>
-inline int TensorListBase<Example>::FindFirst(const Example& item)
-{
-    for (int i = 0; i < count; i++) {
-        if (item.id == items[i].id)
-            return i;
-    }
-    return -1;
-}
-template <>
-inline int TensorListBase<Result>::FindFirst(const Result& item)
-{
-    for (int i = 0; i < count; i++) {
-        if (item.id == items[i].id)
-            return i;
-    }
-    return -1;
-}
 /* clear the data array */
 template <typename T>
 void TensorListBase<T>::Clear()
@@ -383,8 +363,7 @@ template struct TensorListBase<long>;
 template struct TensorListBase<float>;
 template struct TensorListBase<short>;
 template struct TensorListBase<XTensor*>;
-template struct TensorListBase<Result>;
+template struct TensorListBase<uint64_t>;
-template struct TensorListBase<Example>;
 template struct TensorListBase<void*>;
 } /* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
--- a/source/tensor/XList.h
+++ b/source/tensor/XList.h
@@ -26,6 +26,8 @@
 #include "XMem.h"
 #include "XGlobal.h"
+#include <cstdint>
 #ifndef __TensorList_H__
 #define __TensorList_H__
@@ -118,7 +120,14 @@ public:
    void Shuffle(int nround = 10, int beg = -1, int len = 0);
    /* short */
-    T& operator[] (int i) { return GetItem(i); };
+    T& operator[] (int i) { 
+        CheckNTErrors(i >= -count && i < count, "Index of a list item is out of scope!");
+        CheckNTErrors(count > 0, "Cannt index the item in an empty list!");
+        if (i < 0)
+            return items[count + i];
+        else
+            return items[i];
+    };
    T& Get(int i) { return GetItem(i); };
    void Set(int i, T item) { SetItem(i, item); };
 };
@@ -132,19 +141,7 @@ typedef TensorListBase<char*> StrList;
 typedef TensorListBase<long> LongList;
 typedef TensorListBase<float> FloatList;
 typedef TensorListBase<short> ShortList;
+typedef TensorListBase<uint64_t> UInt64List;
-struct Example {
-    int id;
-    IntList data;
-};
-struct Result {
-    int id;
-    IntList data;
-};
-typedef TensorListBase<Result> ResultList;
-typedef TensorListBase<Example> ExampleList;
 typedef TensorListBase<XTensor*> TensorList;
 } /* end of the nts (NiuTrans.Tensor) namespace */

--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -64,7 +64,7 @@
 #endif
 /* the nts (NiuTrans.Tensor) namespace */
-namespace nts{
+namespace nts {
 int tensorIDGlobal = 0;
 MUTEX_HANDLE tensorMutex;
@@ -73,11 +73,11 @@ XTensor NULLTensor;
 /* generate a tensor id */
 int MakeTensorID()
 {
-    if(tensorIDGlobal == 0)
+    if (tensorIDGlobal == 0)
        MUTEX_INIT(tensorMutex);
    MUTEX_LOCK(tensorMutex);
-    int id = tensorIDGlobal++;    
+    int id = tensorIDGlobal++;
    MUTEX_UNLOCK(tensorMutex);
    return id;
@@ -91,13 +91,13 @@ XTensor::XTensor()
    id = MakeTensorID();
    isDefaultDType = true;
-    isInGlobalMem  = false;
+    isInGlobalMem = false;
    isInit = false;
-    isTmp =  false;
+    isTmp = false;
 }
 /* constructor */
-XTensor::XTensor(const XTensor * reference)
+XTensor::XTensor(const XTensor* reference)
 {
    Init();
    SetDataPointer();
@@ -112,7 +112,7 @@ constructor
 >> myDevID - device id
 >> myMem - memory pool used to allocating the data array
 */
-XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
+XTensor::XTensor(const int myOrder, int myDevID, XMem* myMem)
 {
    CheckNTErrors((myOrder >= 0), "Illegal tensor order!");
@@ -134,8 +134,8 @@ constructor
 >> myDevID - device id
 >> myMem - memory pool used to allocating the data array
 */
-XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
+XTensor::XTensor(const int myOrder, const int* myDimSize, const TENSOR_DATA_TYPE myDataType,
-                 const float myDenseRatio, int myDevID, XMem * myMem)
+    const float myDenseRatio, int myDevID, XMem* myMem)
 {
    Init();
    SetDataPointer();
@@ -145,12 +145,12 @@ XTensor::XTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYP
    mem = myMem;
    devID = myMem != NULL ? myMem->devID : myDevID;
-    if(order >= 0)
+    if (order >= 0)
        Resize(myOrder, myDimSize, myDataType, myDenseRatio);
 }
 /* copy constructor */
-XTensor::XTensor(const XTensor &reference)
+XTensor::XTensor(const XTensor& reference)
 {
    Init();
    SetDataPointer();
@@ -158,8 +158,8 @@ XTensor::XTensor(const XTensor &reference)
    ShallowCopy(reference);
    data = NULL;
    dataHost = NULL;
-    if(reference.isTmp){
+    if (reference.isTmp) {
        devID = reference.devID;
        mem = reference.mem;
        data = reference.data;
@@ -172,26 +172,26 @@ XTensor::XTensor(const XTensor &reference)
           This is VERY tricky and there might be better solutions :) */
        *reference.dataP = NULL;
    }
-    else{
+    else {
        devID = reference.devID;
        mem = reference.mem;
        InitTensorV2(this, &reference);
        _CopyValues(&reference, this);
    }
-    if(reference.isTmp)
+    if (reference.isTmp)
        XLink::Replace(&reference, this);
-    else{
+    else {
        CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
        XLink::CopyIncoming(&reference, this);
    }
    isInit = true;
-    isTmp  = reference.isTmp;
+    isTmp = reference.isTmp;
 }
 /* copy constructor (with right value reference) */
-XTensor::XTensor(const XTensor &&reference)
+XTensor::XTensor(const XTensor&& reference)
 {
    Init();
    SetDataPointer();
@@ -215,7 +215,7 @@ XTensor::XTensor(const XTensor &&reference)
    XLink::Replace(&reference, this);
    isInit = true;
-    isTmp  = reference.isTmp;
+    isTmp = reference.isTmp;
 }
 /* de-constructor */
@@ -225,12 +225,12 @@ XTensor::~XTensor()
       the connectivity of the graph. To kill memory
       leak, we release the data of the new tensor
       when its parent is deleted (see ClearIncoming). */
-    if(outgo.tailNum > 0){
+    if (outgo.tailNum > 0) {
        int dims[MAX_TENSOR_DIM_NUM];
        memcpy(dims, dimSize, order * sizeof(int));
        dims[0] = -dims[0];
-        XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
+        XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
        newTensor->SetTMPFlag();
        newTensor->data = data;
        data = NULL;
@@ -243,12 +243,12 @@ XTensor::~XTensor()
    DestroyData();
-    if(grad != NULL)
+    if (grad != NULL)
        delete grad;
 }
 /* set the name of the tensor */
-void XTensor::SetName(const char * myName)
+void XTensor::SetName(const char* myName)
 {
    strcpy(name, myName);
 }
@@ -277,10 +277,10 @@ void XTensor::Init()
    isInGlobalMem = false;
    memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
    isInit = false;
-    isTmp =  false;
+    isTmp = false;
    isGrad = false;
-    isVar  = false;
+    isVar = false;
-    enableGrad = true;
+    enableGrad = X_ENABLE_GRAD;
    visitMark = 0;
    grad = NULL;
 }
@@ -288,16 +288,16 @@ void XTensor::Init()
 /* delete data arrays */
 void XTensor::DestroyData()
 {
-    if(data != NULL && mem == NULL && !isShared)
+    if (data != NULL && mem == NULL && !isShared)
        XMemFree(devID, data);
-    else if(data != NULL && isInGlobalMem)
+    else if (data != NULL && isInGlobalMem)
        FreeData(this, mem);
-    else if(data != NULL)
+    else if (data != NULL)
        mem->Release(data, GetDataSizeInChar(), signature);
    data = NULL;
-    if(dataHost != NULL)
+    if (dataHost != NULL)
        delete[] (char*)dataHost;
    dataHost = NULL;
 }
@@ -307,7 +307,7 @@ shallow copy of the tensor
 Note that we do not copy data array here
 >> tensor - the source tensor
 */
-void XTensor::ShallowCopy(const XTensor &tensor)
+void XTensor::ShallowCopy(const XTensor& tensor)
 {
    strcpy(name, tensor.name);
    order = tensor.order;
@@ -318,7 +318,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
    unitNum = tensor.unitNum;
    isSparse = tensor.isSparse;
    unitNumNonZero = tensor.unitNumNonZero;
-    denseRatio =  tensor.denseRatio;
+    denseRatio = tensor.denseRatio;
    isShared = tensor.isShared;
    isDefaultDType = tensor.isDefaultDType;
    isInGlobalMem = tensor.isInGlobalMem;
@@ -330,12 +330,12 @@ XTensor& XTensor::operator= (const XTensor& tensor)
 {
    /* we must make a hard copy of the tensor if it is the input
       of another node. */
-    if(outgo.tailNum > 0){
+    if (outgo.tailNum > 0) {
        int dims[MAX_TENSOR_DIM_NUM];
        memcpy(dims, dimSize, order * sizeof(int));
        dims[0] = -dims[0];
-        XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
+        XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
        newTensor->SetTMPFlag();
        newTensor->data = data;
        newTensor->dataHost = dataHost;
@@ -350,35 +350,35 @@ XTensor& XTensor::operator= (const XTensor& tensor)
        dataHost = NULL;
    }
-    if(false && !tensor.isTmp){
+    if (false && !tensor.isTmp) {
        /* NOTE: this might lead to additional data copy by Mac LLVM compilers */
        /* we make an identity transformation here */
-        if(outgo.tailNum > 0)
+        if (outgo.tailNum > 0)
            XLink::ClearOutgoing(this);
        XLink::ClearIncoming(this);
-        if(!_IsSameShaped(this, &tensor))
+        if (!_IsSameShaped(this, &tensor))
            Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio);
        _Identity(&tensor, this);
        XLink::MakeLink(&tensor, NULL, this, FUNC_IDENTITY);
    }
-    else{
+    else {
        /* hard copy of the data array */
        int size = unitNum * unitSize;
-        if( isInit && !isSparse && !tensor.isSparse &&
+        if (isInit && !isSparse && !tensor.isSparse &&
            size == tensor.unitNum * tensor.unitSize &&
-          ((devID < 0 && tensor.devID < 0) && devID == tensor.devID) &&
+            ((devID < 0 && tensor.devID < 0) && devID == tensor.devID) &&
            data != NULL)
        {
            XMemCopy(data, devID, tensor.data, tensor.devID, size);
-            if(dataHost != NULL && tensor.dataHost != NULL)
+            if (dataHost != NULL && tensor.dataHost != NULL)
                XMemCopy(dataHost, -1, tensor.dataHost, tensor.devID, size);
        }
-        else{
+        else {
            DestroyData();
-            if(!isInit){
+            if (!isInit) {
                devID = tensor.devID;
                mem = tensor.mem;
            }
@@ -391,7 +391,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
        ShallowCopy(tensor);
        isInit = true;
-        isTmp  = false;
+        isTmp = false;
        CheckNTErrors(outgo.tailNum == 0, "The node has outgoing edge to other nodes!");
@@ -407,12 +407,12 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
 {
    /* we must make a hard copy of the tensor if it is the input
       of another node. */
-    if(outgo.tailNum > 0){
+    if (outgo.tailNum > 0) {
        int dims[MAX_TENSOR_DIM_NUM];
        memcpy(dims, dimSize, order * sizeof(int));
        dims[0] = -dims[0];
-        XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
+        XTensor* newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
        newTensor->SetTMPFlag();
        newTensor->data = data;
        newTensor->dataHost = dataHost;
@@ -433,7 +433,7 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
    isInit = true;
    devID = tensor.devID;
-    mem  = tensor.mem;
+    mem = tensor.mem;
    data = tensor.data;
    signature = tensor.signature;
@@ -456,7 +456,7 @@ XTensor XTensor::operator+ (const XTensor& tensor) const
 }
 /* overloading of the plus-sign */
-XTensor XTensor::operator+ (const DTYPE shift) const 
+XTensor XTensor::operator+ (const DTYPE shift) const
 {
    return ScaleAndShift(*this, 1, shift);
 }
@@ -500,7 +500,7 @@ XTensor XTensor::operator/ (const XTensor& tensor) const
 /* overloading of the division-sign */
 XTensor XTensor::operator/ (const DTYPE scale) const
 {
-    return ScaleAndShift(*this, (DTYPE)1/scale, 0);
+    return ScaleAndShift(*this, (DTYPE)1.0F / scale, 0);
 }
 /* 
@@ -518,7 +518,7 @@ relocate the data on the target device
 >> myDevId - target device id
 >> myMem - memory pool on the target device
 */
-void XTensor::SetDevice(int myDevId, XMem * myMem)
+void XTensor::SetDevice(int myDevId, XMem* myMem)
 {
    if (myMem == NULL) {
        myMem = GMems.GetMem(myDevId);
@@ -527,9 +527,9 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
    isInGlobalMem = false;
 }
-bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
+bool XTensor::IsReduceShaped(const XTensor* a, const XTensor* b, int dim)
 {
-    if(a == NULL || b == NULL)
+    if (a == NULL || b == NULL)
        return false;
    if ((a->order - 1) != b->order)
@@ -541,18 +541,18 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
                return false;
        }
        else if (i >= dim) {
-            if (a->dimSize[i+1] != b->dimSize[i])
+            if (a->dimSize[i + 1] != b->dimSize[i])
                return false;
        }
    }
-    if(a->dataType != b->dataType)
+    if (a->dataType != b->dataType)
        return false;
-    if(a->denseRatio != b->denseRatio)
+    if (a->denseRatio != b->denseRatio)
        return false;
-    if(a->isSparse != b->isSparse)
+    if (a->isSparse != b->isSparse)
        return false;
    return true;
@@ -562,7 +562,7 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
 set the size of each dimension 
 >> myDimSize - size of each dimension
 */
-void XTensor::SetDim(int * myDimSize)
+void XTensor::SetDim(int* myDimSize)
 {
    for (int i = 0; i < order; i++) {
        dimSize[i] = myDimSize[i];
@@ -579,7 +579,7 @@ int XTensor::GetDim(const int dim) const
    CheckNTErrors(dim >= -order, "dimenision is out of range!");
    int d = dim;
-    if(dim < 0)
+    if (dim < 0)
        d = order + dim;
    return dimSize[d];
@@ -590,12 +590,12 @@ reshape the tensor
 >> myOrder - order of the tensor
 >> myDimSize - size of each dimension
 */
-void XTensor::Reshape(const int myOrder, const int * myDimSize)
+void XTensor::Reshape(const int myOrder, const int* myDimSize)
 {
    int dims[MAX_TENSOR_DIM_NUM];
    int num = 1;
-    for(int i = 0; i < myOrder; i++){
+    for (int i = 0; i < myOrder; i++) {
        num *= myDimSize[i];
        dims[i] = abs(myDimSize[i]);
    }
@@ -663,7 +663,7 @@ XTensor XTensor::TypeAs(const XTensor input)
 /* get the number of items in the data array */
 int XTensor::GetSize() const
 {
-    if(isSparse)
+    if (isSparse)
        return unitNumNonZero;
    else
        return unitNum;
@@ -672,13 +672,13 @@ int XTensor::GetSize() const
 /* get the size of the memory space used */
 int XTensor::GetDataSizeInChar() const
 {
-    if(isSparse){
+    if (isSparse) {
        int num = int(unitNum * denseRatio + 1);
-        int tupleSize = sizeof(int)+sizeof(DTYPE);
+        int tupleSize = sizeof(int) + sizeof(DTYPE);
-        int size = sizeof(int) + tupleSize*(num);
+        int size = sizeof(int) + tupleSize * (num);
        return size;
    }
-    else{
+    else {
        return unitNum * unitSize;
    }
 }
@@ -690,15 +690,15 @@ get unit size in terms of "dataType"
 */
 int XTensor::GetUnitSize(TENSOR_DATA_TYPE myDataType) const
 {
-    if(myDataType == X_INT)
+    if (myDataType == X_INT)
        return sizeof(int);
-    else if(myDataType == X_FLOAT)
+    else if (myDataType == X_FLOAT)
        return sizeof(float);
-    else if(myDataType == X_DOUBLE)
+    else if (myDataType == X_DOUBLE)
        return sizeof(double);
-    else if(myDataType == X_INT8)
+    else if (myDataType == X_INT8)
        return 1;
-    else if(myDataType == X_FLOAT16)
+    else if (myDataType == X_FLOAT16)
        return 2;
    return sizeof(float);
 }
@@ -737,21 +737,21 @@ MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const
 a vector with all entries of 0 
 >> stream - stream for the job pipeline
 */
-void XTensor::SetZeroAll(XStream * stream)
+void XTensor::SetZeroAll(XStream* stream)
 {
-    if(data == NULL)
+    if (data == NULL)
        return;
-    if(isSparse){
+    if (isSparse) {
-        if(devID >= 0){
+        if (devID >= 0) {
 #ifdef USE_CUDA
-            int size = sizeof(int) + (sizeof(int)+sizeof(DTYPE)) * unitNumNonZero;
+            int size = sizeof(int) + (sizeof(int) + sizeof(DTYPE)) * unitNumNonZero;
            int devIDBackup = 0;
            cudaGetDevice(&devIDBackup);
            cudaSetDevice(devID);
-            if(stream == NULL)
+            if (stream == NULL)
                cudaMemset(data, 0, size);
            else
                cudaMemsetAsync(data, 0, size, stream->stream);
@@ -764,14 +764,14 @@ void XTensor::SetZeroAll(XStream * stream)
        unitNumNonZero = 0; 
    }
-    else{
+    else {
-        if(devID >= 0){
+        if (devID >= 0) {
 #ifdef USE_CUDA
            int devIDBackup = 0;
            cudaGetDevice(&devIDBackup);
            cudaSetDevice(devID);
-            if(stream == NULL)
+            if (stream == NULL)
                cudaMemset(data, 0, unitNum * unitSize);
            else
                cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream);
@@ -789,9 +789,9 @@ void XTensor::SetZeroAll(XStream * stream)
 >> num - number of data items
 >> beg - where we start the data copy in the data array of the tensor
 */
-void XTensor::SetData(const void * d, int num, int beg)
+void XTensor::SetData(const void* d, int num, int beg)
 {
-    if (data == NULL || d ==NULL)
+    if (data == NULL || d == NULL)
        return;
    CheckNTErrors(!isSparse, "TODO");
@@ -830,7 +830,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
    // srand((unsigned)time(0));
    DTYPE variance = upper - lower;
-    void * d = NULL;
+    void* d = NULL;
    if (dataType == X_FLOAT) {
        d = new float[unitNum];
        for (int i = 0; i < unitNum; i++) {
@@ -868,12 +868,12 @@ double GaussRand(DTYPE mean, DTYPE standardDeviation)
    double z;
    double pi = 3.141592654;
-    if (phase == 0){
+    if (phase == 0) {
        u = (rand() + 1.0) / (RAND_MAX + 1.0);
        v = (rand() + 1.0) / (RAND_MAX + 1.0);
-        z = sqrt(-2.0 * log(u))* sin(2.0 * pi * v);
+        z = sqrt(-2.0 * log(u)) * sin(2.0 * pi * v);
    }
-    else{
+    else {
        z = sqrt(-2.0 * log(u)) * cos(2.0 * pi * v);
    }
@@ -894,7 +894,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
        return;
    // srand((unsigned)time(0));
-    void * d = NULL;
+    void* d = NULL;
    if (dataType == X_FLOAT) {
        d = new float[unitNum];
        for (int i = 0; i < unitNum; i++) {
@@ -927,7 +927,7 @@ set tensor items with an array of offsets
 >> value - value for the data items
 >> num - number of the data items
 */
-void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num)
+void XTensor::SetDataBatched(MTYPE* offsets, DTYPE value, int num)
 {
    _SetDataWithOffset(this, offsets, value, num);
 }
@@ -938,7 +938,7 @@ set tensor items with an array of values
 >> values - value for each data item
 >> num - number of the data items
 */
-void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
+void XTensor::SetDataBatchedWithValues(MTYPE* offsets, void* values, int num)
 {
    _SetDataWithOffsetAndValue(this, offsets, values, num);
 }
@@ -973,9 +973,9 @@ DTYPE XTensor::Get(int offset) const
    CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
    CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
    CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset).");
-    DTYPE * address = (DTYPE*)data + offset;
+    DTYPE* address = (DTYPE*)data + offset;
    return ToCPU(devID, address);
 }
@@ -985,25 +985,25 @@ get the pointer to a cell
 >> size - size of index
 << return - pointer to the cell
 */
-void * XTensor::GetCell(int index[], int size) const
+void* XTensor::GetCell(int index[], int size) const
 {
    CheckNTErrors((size == order), "Illegal index!");
    int offset = index[0];
-    for(int i = 1; i < size; ++i){
+    for (int i = 1; i < size; ++i) {
        CheckNTErrors((index[i] < dimSize[i]), "Index is out of range!");
        offset = offset * dimSize[i] + index[i];
    }
-    if(isSparse){
+    if (isSparse) {
        DTYPE value;
-        void * p;
+        void* p;
-        if(BinarySearch(offset, value, p))
+        if (BinarySearch(offset, value, p))
            return (char*)p + sizeof(int);
        else
            return NULL;
    }
-    else{
+    else {
        return ((char*)data) + offset * unitSize;
    }
 }
@@ -1018,7 +1018,7 @@ DTYPE XTensor::Get0D() const
    CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
    int dims[1] = {0};
-    void * value = GetCell(dims, 0);
+    void* value = GetCell(dims, 0);
    return ToCPU(devID, value);
 }
@@ -1035,7 +1035,7 @@ DTYPE XTensor::Get1D(int i) const
    CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
    int dims[1] = {i};
-    void * value = GetCell(dims, 1);
+    void* value = GetCell(dims, 1);
    return ToCPU(devID, value);
 }
@@ -1054,7 +1054,7 @@ DTYPE XTensor::Get2D(int ni, int mi) const
    CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
    int dims[2] = {ni, mi};
-    void * value = GetCell(dims, 2);
+    void* value = GetCell(dims, 2);
    return ToCPU(devID, value);
 }
@@ -1067,14 +1067,14 @@ get the value of a cell in a 3d tensor
 */
 DTYPE XTensor::Get3D(int d0, int d1, int d2) const
 {
-    CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 3!");
+    CheckNTErrors((order == 3), "Cannot get a 3d cell for a tensor whose order is not 3!");
    CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!");
    CheckNTErrors((d1 >= 0 && d1 < dimSize[1]), "dimension 1 is out of range!");
    CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!");
    CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in default type.");
    int dims[3] = {d0, d1, d2};
-    void * value = GetCell(dims, 3);
+    void* value = GetCell(dims, 3);
    return ToCPU(devID, value);
 }
@@ -1089,9 +1089,9 @@ int XTensor::GetInt(int offset) const
    CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
    CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
    CheckNTErrors(denseRatio == 1.0F, "Only dense tensors are supported in Get(offset).");
-    int * address = (int*)data + offset;
+    int* address = (int*)data + offset;
    return ToCPUInt(devID, address);
 }
@@ -1105,7 +1105,7 @@ int XTensor::Get0DInt() const
    CheckNTErrors(dataType == X_INT, "The tensor is not in int type.");
    int dims[1] = {0};
-    void * value = GetCell(dims, 0);
+    void* value = GetCell(dims, 0);
    return ToCPUInt(devID, value);
 }
@@ -1122,7 +1122,7 @@ int XTensor::Get1DInt(int i) const
    CheckNTErrors(dataType == X_INT, "The tensor is not in int type.");
    int dims[1] = {i};
-    void * value = GetCell(dims, 1);
+    void* value = GetCell(dims, 1);
    return ToCPUInt(devID, value);
 }
@@ -1133,7 +1133,7 @@ get the value of a cell in a 2d tensor in int type
 >> mi - column index
 << return - value of cell(ni, mi) in int
 */
- int XTensor::Get2DInt(int ni, int mi) const
+int XTensor::Get2DInt(int ni, int mi) const
 {
    CheckNTErrors(order == 2, "Cannot get a 2d cell for a tensor whose order is not 2!");
    CheckNTErrors(ni >= 0 && ni < dimSize[0], "dimension 0 is out of range!");
@@ -1141,7 +1141,7 @@ get the value of a cell in a 2d tensor in int type
    CheckNTErrors(dataType == X_INT, "The tensor is not in default type.");
    int dims[2] = {ni, mi};
-    void * value = GetCell(dims, 2);
+    void* value = GetCell(dims, 2);
    return ToCPUInt(devID, value);
 }
@@ -1155,14 +1155,14 @@ get the value of a cell in a 3d tensor in int type
 */
 int XTensor::Get3DInt(int d0, int d1, int d2) const
 {
-    CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 3!");
+    CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 3!");
    CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
    CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
    CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
    CheckNTErrors(dataType == X_INT, "The tensor is not in default type.");
    int dims[3] = {d0, d1, d2};
-    void * value = GetCell(dims, 3);
+    void* value = GetCell(dims, 3);
    return ToCPUInt(devID, value);
 }
@@ -1177,8 +1177,8 @@ DTYPE XTensor::GetInSparse(int i) const
    CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
    CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
-    char * d = (char*)data + sizeof(int);
+    char* d = (char*)data + sizeof(int);
-    DTYPE * value = (DTYPE*)(d + (sizeof(int) + sizeof(DTYPE)) * i + sizeof(int));
+    DTYPE* value = (DTYPE*)(d + (sizeof(int) + sizeof(DTYPE)) * i + sizeof(int));
    return ToCPU(devID, value);
 }
@@ -1193,9 +1193,9 @@ int XTensor::GetKeyInSparse(int i) const
    CheckNTErrors(i >= 0 && i < unitNum, "Index is out of range!");
    CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
-    char * d = (char*)data + sizeof(int);
+    char* d = (char*)data + sizeof(int);
-    int * key = (int*)(d + (sizeof(int) + sizeof(DTYPE)) * i);
+    int* key = (int*)(d + (sizeof(int) + sizeof(DTYPE)) * i);
    return ToCPUInt(devID, key);
 }
@@ -1222,7 +1222,7 @@ bool XTensor::Set(DTYPE value, int offset)
    CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
    CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
-    DTYPE * d = (DTYPE*)data + offset;
+    DTYPE* d = (DTYPE*)data + offset;
    return SetToDevice(devID, d, value);
 }
@@ -1288,7 +1288,7 @@ set the value of a cell in a 3d tensor in default type
 */
 bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
 {
-    CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 3!");
+    CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 3!");
    CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
    CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
    CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
@@ -1308,9 +1308,9 @@ bool XTensor::SetInt(int value, int offset)
 {
    CheckNTErrors(offset >= 0 && offset < unitNum, "Invalid index!");
    CheckNTErrors(data != NULL, "Cannot use an uninitialized tensor!");
-    int * d = (int*)data + offset;
+    int* d = (int*)data + offset;
    return SetToDeviceInt(devID, d, value);
 }
@@ -1390,7 +1390,7 @@ set the integer value of a cell in a 3d tensor in default type
 */
 bool XTensor::Set3DInt(int value, int d0, int d1, int d2)
 {
-    CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 3!");
+    CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 3!");
    CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
    CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
    CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
@@ -1408,23 +1408,23 @@ increase the value of a cell in a 2d tensor
 >> mi - column index
 << return - succeeded or not
 */
- bool XTensor::Add2D(DTYPE value, int ni, int mi)
+bool XTensor::Add2D(DTYPE value, int ni, int mi)
 {
    CheckNTErrors(ni >= 0 && ni < dimSize[0], "the row index is out of range!");
    CheckNTErrors(mi >= 0 && mi < dimSize[1], "the column index is out of range!");
    CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
    CheckNTErrors(isSparse == false, "TODO!");
-    if(devID < 0){
+    if (devID < 0) {
-        DTYPE * p = (DTYPE*)data + ni * dimSize[1] + mi;
+        DTYPE* p = (DTYPE*)data + ni * dimSize[1] + mi;
-        CheckNTErrors((p != NULL), "No data array is found!");    
+        CheckNTErrors((p != NULL), "No data array is found!");
        *p = *p + value;
        return true;
    }
-    else{
+    else {
        int dims[2] = {ni, mi};
        return SetToDevice(devID, GetCell(dims, 2), Get2D(ni, mi) + value);
    }
@@ -1433,24 +1433,24 @@ increase the value of a cell in a 2d tensor
 /* get the number of non-zero elements (in a sparse tensor) */
 int XTensor::GetNonzeroSize() const
 {
-    if(!isSparse){
+    if (!isSparse) {
        XPRINT(1, stderr, "WARNING! Counting non-zero elements in a dense tensor might be slow!\n");
        CheckNTErrors(devID < 0, "TODO");
-        if(dataType == DEFAULT_DTYPE){
+        if (dataType == DEFAULT_DTYPE) {
            int count = 0;
-            for(int i = 0; i < unitNum; i++){
+            for (int i = 0; i < unitNum; i++) {
                DTYPE value = *(DTYPE*)((char*)data + i * sizeof(DTYPE));
-                if(value == 0)
+                if (value == 0)
                    count++;
            }
            return count;
        }
-        else{
+        else {
            ShowNTErrors("TODO!");
            return -1;
        }
    }
-    else{
+    else {
        /* return the head of the tuple list */
        return unitNumNonZero;
    }
@@ -1481,7 +1481,7 @@ set the tensor as "variable"
 void XTensor::SetVarFlag(bool myIsVar)
 {
    isVar = myIsVar;
-    if(isVar)
+    if (isVar)
        SetGradFlag(true);
 }
@@ -1493,11 +1493,11 @@ resize a tensor with a specified tensor size
 >> myDenseRatio - how often an element has non-zero value
 << return - succeeded or not
 */
-bool XTensor::Resize(const int myOrder, const int * myDimSize, 
+bool XTensor::Resize(const int myOrder, const int* myDimSize, 
                     const TENSOR_DATA_TYPE myDataType, const float myDenseRatio)
 {
    /* free old mem */
-    if(data != NULL){
+    if (data != NULL) {
        if (mem == NULL)
            XMemFree(devID, data);
        else
@@ -1513,11 +1513,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
    bool filledData = true;
    bool zeroData = false;
-    for(int i = 0; i < order; i++){
+    for (int i = 0; i < order; i++) {
        dimSize[i] = abs(myDimSize[i]);
-        if(myDimSize[i] < 0)
+        if (myDimSize[i] < 0)
            filledData = false;
-        if(myDimSize[i] == 0)
+        if (myDimSize[i] == 0)
            zeroData = true;
        unitNum *= dimSize[i];
    }
@@ -1528,17 +1528,17 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
    dataType = myDataType;
    unitSize = GetUnitSize(dataType);
-    if(myDataType != DEFAULT_DTYPE)
+    if (myDataType != DEFAULT_DTYPE)
        isDefaultDType = false;
    else
        isDefaultDType = true;
-    if(zeroData){
+    if (zeroData) {
        unitNum = 0;
        return false;
    }
-    if(isSparse){
+    if (isSparse) {
        /*
        for sparse matrices, we use a list of tuple (key, value), 
        ordered by key. Take a (2-dimensional) matrix as an example, 
@@ -1557,21 +1557,21 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
        */
        int num = int(unitNum * denseRatio + 1);
-        int tupleSize = sizeof(int)+sizeof(DTYPE);
+        int tupleSize = sizeof(int) + sizeof(DTYPE);
-        int size = sizeof(int) + tupleSize*(num);
+        int size = sizeof(int) + tupleSize * (num);
-        if(filledData){
-            int * d = NULL;
-            if(mem == NULL){
+        if (filledData) {
+            int* d = NULL;
+            if (mem == NULL) {
                d = new int[size];
                memset(d, 0, size);
            }
-            else{
+            else {
                d = (int*)mem->Alloc(mem->devID, size);
            }
-            if(d == NULL)
+            if (d == NULL)
                return false;
 #if !defined(UNSAFE_BUT_FAST_MEM)
@@ -1581,11 +1581,11 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
        }
        return true;
    }
-    else{
+    else {
-        if(filledData){
+        if (filledData) {
            /* allocate the new one */
-            if(mem == NULL){
+            if (mem == NULL) {
-                data = XMemAlloc(devID, unitNum * unitSize); 
+                data = XMemAlloc(devID, unitNum * unitSize);
 #if defined(UNSAFE_BUT_FAST_MEM)
                XMemSet(devID, data, 0, unitNum * unitSize);
 #endif
@@ -1593,12 +1593,12 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
            else
                data = (void*)mem->Alloc(mem->devID, unitNum * unitSize);
-            if(data == NULL)
+            if (data == NULL)
                return false;
        }
 #if !defined(UNSAFE_BUT_FAST_MEM)
-        if(data != NULL)
+        if (data != NULL)
            XMem::SetZero(data, unitNum * unitSize, mem);
 #endif
        return true;
@@ -1609,12 +1609,12 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
 resize a tensor by another
 >> myTensor - tensor for reference
 */
-bool XTensor::Resize(const XTensor * myTensor)
+bool XTensor::Resize(const XTensor* myTensor)
 {
    denseRatio = myTensor->denseRatio;
    TENSOR_DATA_TYPE myDataType = myTensor->dataType;
-    if(myDataType != DEFAULT_DTYPE)
+    if (myDataType != DEFAULT_DTYPE)
        isDefaultDType = false;
    else
        isDefaultDType = true;
@@ -1630,54 +1630,54 @@ binary search to find an element in a sparse tensor
              it is the previous one if there is no hit
 << return - found it or not?
 */
-bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
+bool XTensor::BinarySearch(int key, DTYPE& value, void*& position) const
 {
    CheckNTErrors((isSparse), "A sparse tensor is required!");
    CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in the default type.");
-    int * d = (int*)data;
+    int* d = (int*)data;
-    if(key < 0 || *d == 0){
+    if (key < 0 || *d == 0) {
        value = 0;
        position = NULL;
        return false;
    }
-    int low = 0;  
+    int low = 0;
-    int high = *d - 1;  
+    int high = *d - 1;
    int last = -1;
    bool ok = false;
-    int * k = NULL;
+    int* k = NULL;
    int headSize = sizeof(int);
-    int tupleSize = sizeof(int)+sizeof(DTYPE);
+    int tupleSize = sizeof(int) + sizeof(DTYPE);
-    char * p = (char*)data + headSize;
+    char* p = (char*)data + headSize;
-    while (low <= high){  
+    while (low <= high) {
-        int mid = low + (high-low)/2;
+        int mid = low + (high - low) / 2;
        k = (int*)(p + tupleSize * mid);
-        if (*k == key){
+        if (*k == key) {
            ok = true;
-            high = mid -1;
+            high = mid - 1;
            break;
-        }  
-        else if(*k > key){
-            high = mid -1;
        }
-        else{
+        else if (*k > key) {
-            low = mid +1;
+            high = mid - 1;
+        }
+        else {
+            low = mid + 1;
            last = mid;
        }
-    }  
+    }
-    if(ok){
+    if (ok) {
-        DTYPE * p = (DTYPE*)((char*)k + sizeof(int));
+        DTYPE* p = (DTYPE*)((char*)k + sizeof(int));
        value = *p;
        position = k;
        return true;
    }
-    else{
+    else {
        value = 0;
-        if(last == -1)
+        if (last == -1)
            position = NULL;
        else
            position = (char*)data + headSize + tupleSize * last;
@@ -1693,12 +1693,12 @@ dump data to a file
 >> beg - the first item id
 >> verbose - verbose level
 */
-void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, const int verbose)
+void XTensor::Dump(FILE* file, const char* label, const int n, const int beg, const int verbose)
 {
    if (verbose > verboseLevel)
        return;
-    void * d = data;
+    void* d = data;
    bool isNewData = false;
 #ifdef USE_CUDA
@@ -1716,7 +1716,7 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, 
                num *= dimSize[i];
            num = int(num * denseRatio + 1);
            int tupleSize = sizeof(int) + sizeof(DTYPE);
-            int size = sizeof(int) + tupleSize*(num);
+            int size = sizeof(int) + tupleSize * (num);
            d = new char[size];
            memset(d, 0, size);
@@ -1730,8 +1730,8 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, 
    if (label != NULL)
        fprintf(file, "%s ", label);
-    if(isInit){
+    if (isInit) {
        fprintf(file, "order=%d dimsize=", order);
        if (order == 0) {
            fprintf(file, "%d,", dimSize[0]);
@@ -1742,21 +1742,21 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, 
                fprintf(file, ",");
        }
    }
-    else{
+    else {
        fprintf(file, "order=-1 dimsize=-1");
    }
    fprintf(file, " dtype=%s dense=%f\n", GetDataTypeName(dataType), denseRatio);
-    if(!isInit){
+    if (!isInit) {
        fprintf(file, "NULL");
    }
    if (!isSparse) {
        if (dataType == DEFAULT_DTYPE) {
            int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
-            for(int i = beg; i < end; i++){
+            for (int i = beg; i < end; i++) {
                DTYPE f = ((DTYPE*)d)[i];
-                if(i == beg)
+                if (i == beg)
                    fprintf(file, "%e", f);
                else
                    fprintf(file, " %e", f);
@@ -1765,9 +1765,9 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, 
        }
        else if (dataType == X_INT) {
            int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
-            for(int i = beg; i < end; i++){
+            for (int i = beg; i < end; i++) {
                int f = ((int*)d)[i];
-                if(i == beg)
+                if (i == beg)
                    fprintf(file, "%d", f);
                else
                    fprintf(file, " %d", f);
@@ -1807,7 +1807,7 @@ dump data to a file
 >> beg - the first item id
 >> verbose - verbose level
 */
-void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, const int n, const int beg, const int verbose)
+void XTensor::Dump(const XTensor* tensor, FILE* file, const char* label, const int n, const int beg, const int verbose)
 {
    XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
    _CopyValues(tensor, &a);
@@ -1839,7 +1839,7 @@ read data from a file
 >> file - where to load the data
 >> label - label of the tensor
 */
-void XTensor::Read(FILE * file, const char * label)
+void XTensor::Read(FILE* file, const char* label)
 {
    char typeName[32] = "";
    char dimSizeName[128] = "";
@@ -1858,7 +1858,7 @@ void XTensor::Read(FILE * file, const char * label)
    fgetc(file);
    if (fscanf(file, "order=%d dimsize=%s dtype=%s dense=%f",
-                      &dimNum, dimSizeName, typeName, &dRatio) < 4) {
+        &dimNum, dimSizeName, typeName, &dRatio) < 4) {
        ShowNTErrors("Incorrect format when reading the tensor!");
    }
@@ -1872,7 +1872,7 @@ void XTensor::Read(FILE * file, const char * label)
    int o = 0;
    bool sameSize = true;
-    char * p = dimSizeName;
+    char* p = dimSizeName;
    while (*p != 0) {
        while (*p == ' ' || *p == '\t')
            p++;
@@ -1896,14 +1896,14 @@ void XTensor::Read(FILE * file, const char * label)
    if (!sameSize || dRatio > denseRatio || GetDataType(typeName) != dataType)
        Resize(dimNum, dims, GetDataType(typeName), dRatio);
-    void * dataBuf = XMemAlloc(-1, GetDataSizeInChar());
+    void* dataBuf = XMemAlloc(-1, GetDataSizeInChar());
-    void * dataBackup = data;
+    void* dataBackup = data;
    data = dataBuf;
    if (!isSparse) {
        if (dataType == DEFAULT_DTYPE) {
            for (int i = 0; i < unitNum; i++) {
-                DTYPE * f = ((DTYPE*)data) + i;
+                DTYPE* f = ((DTYPE*)data) + i;
                if (fscanf(file, "%e", f) < 1) {
                    ShowNTErrors("Incorrect tensor format!");
                }
@@ -1956,13 +1956,13 @@ void XTensor::BinaryRead(FILE* file, size_t offset)
    fseek(file, offset, 0);
    switch (dataType) {
    case X_INT: {
-        int * d = new int[unitNum];
+        int* d = new int[unitNum];
        fread(d, sizeof(int), unitNum, file);
        SetData(d, unitNum);
        delete[] d;
    }
    default: {
-        float * d = new float[unitNum];
+        float* d = new float[unitNum];
        fread(d, sizeof(float), unitNum, file);
        SetData(d, unitNum);
        delete[] d;
@@ -1974,7 +1974,7 @@ void XTensor::BinaryRead(FILE* file, size_t offset)
 flush the data to the target device
 >> targetMem - memory pool on the target device
 */
-void XTensor::FlushToMem(XMem * targetMem)
+void XTensor::FlushToMem(XMem* targetMem)
 {
    if (targetMem == NULL)
        return;
@@ -1987,7 +1987,7 @@ void XTensor::FlushToMem(XMem * targetMem)
            CudaCPUToGPUFlush(&l, targetMem->devID, targetMem);
        }
        else if (mem != targetMem) {
-            void * tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
+            void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
            XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
            data = tmpData;
            mem = targetMem;
@@ -2016,24 +2016,24 @@ allocate the memory space of the tensor (in the global memory)
 >> myMem - the memory pool we are using
 >> useBuf - indicates whether we use the buffer in the memory pool
 */
-void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
+void XTensor::AllocateData(XTensor* tensor, XMem* myMem, bool useBuf)
 {
-    if(tensor == NULL)
+    if (tensor == NULL)
        return;
-    if(myMem == NULL){
+    if (myMem == NULL) {
-        if(tensor->data != NULL)
+        if (tensor->data != NULL)
            FreeData(tensor, NULL, false);
        tensor->data = XMemAlloc(tensor->devID, tensor->GetDataSizeInChar());
        tensor->isInGlobalMem = true;
    }
-    else{
+    else {
        CheckNTErrors((tensor->data == NULL), "Cannot renew the space for the tensor");
-        if(useBuf){
+        if (useBuf) {
            tensor->data = myMem->AllocBuf(tensor->devID, tensor->GetDataSizeInChar());
            tensor->isInGlobalMem = false;
        }
-        else{
+        else {
            tensor->data = myMem->AllocGlobal(tensor->devID, tensor->GetDataSizeInChar());
            tensor->isInGlobalMem = true;
        }
@@ -2048,16 +2048,16 @@ free the memory space of the tensor (in the global memory)
 >> myMem - the memory pool we are using
 >> useBuf - indicates whether we use the buffer in the memory pool
 */
-void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
+void XTensor::FreeData(XTensor* tensor, XMem* myMem, bool useBuf)
 {
-    if(tensor == NULL)
+    if (tensor == NULL)
        return;
-    if(myMem == NULL){
+    if (myMem == NULL) {
        XMemFree(tensor->devID, tensor->data);
    }
-    else{
+    else {
-        if(tensor->isInGlobalMem)
+        if (tensor->isInGlobalMem)
            myMem->ReleaseGlobal(tensor->devID, tensor->data);
        else
            myMem->ReleaseBuf(tensor->devID, tensor->GetDataSizeInChar());
@@ -2068,27 +2068,27 @@ void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
 }
 /* overloading of the plus-sign */
-XTensor operator+ (const DTYPE shift, const XTensor &tensor) 
+XTensor operator+ (const DTYPE shift, const XTensor& tensor)
 {
    return ScaleAndShift(tensor, 1, shift);
 }
 /* overloading of the minus-sign */
-XTensor  operator- (const DTYPE shift, const XTensor &tensor)
+XTensor  operator- (const DTYPE shift, const XTensor& tensor)
 {
    return ScaleAndShift(tensor, 1, -shift);
 }
 /* overloading of the multiply-sign */
-XTensor  operator* (const DTYPE scale, const XTensor &tensor)
+XTensor  operator* (const DTYPE scale, const XTensor& tensor)
 {
    return ScaleAndShift(tensor, scale, 0);
 }
 /* overloading of the division-sign */
-XTensor  operator/ (const DTYPE scale, const XTensor &tensor)
+XTensor  operator/ (const DTYPE scale, const XTensor& tensor)
 {
-    return ScaleAndShift(tensor, (DTYPE)1/scale, 0);
+    return ScaleAndShift(tensor, (DTYPE)1.0F / scale, 0);
 }
 } /* end of the nts (NiuTrans.Tensor) namespace */