1. Clean the codes;

2. Merge with Xiao branch.

1. Clean the codes;
2. Merge with Xiao branch.
58181c8d · liyinqiao · d1714e17 · 58181c8d · 58181c8d · 58181c8d
Commit 58181c8d authored Jul 16, 2019 by liyinqiao
--- a/source/sample/transformer/T2TBatchLoader.cpp
+++ b/source/sample/transformer/T2TBatchLoader.cpp
@@ -166,6 +166,8 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step)
        if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH)
            break;
+        CheckNTErrors(seqCount % step == 0, "Wrong number of sequences!");
    }
    nseqBuf = seqCount;

--- a/source/sample/transformer/T2TSearch.cpp
+++ b/source/sample/transformer/T2TSearch.cpp
@@ -293,10 +293,10 @@ void T2TSearch::Generate(T2TStateBundle * beam)
    CopyValues(index, preID);
-    /* "preID" represents the id (or the offset) of previous state used to make the current
+    /* "preID" represents the id (or the offset) of the previous state used to make the current
       hypothesis. Note that we reshape the "score" tensor into a matrix where each
-       row means a previous state. The column number is size-of-beam * vocab-size. We,
+       row means a previous state. The column number is size-of-beam \times vocab-size. We,
-       therefore, divide entries of the top-k index by vocab-size to compute the id of 
+       therefore, divide entries of the top-k index by vocab-size to compute the id of the
       previous state for each hypothesis in the top-k list. */
    Descale(preID, sizeVocab);

--- a/source/tensor/XDevice.cpp
+++ b/source/tensor/XDevice.cpp
@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
    cudaError_t error = cudaSetDevice(devID);
    if (error != cudaSuccess){
-        fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error));
+        fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n",
+                devID, error, cudaGetErrorString(error));
        exit(1);
    }
 #else
@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
    SetFastFlags();
 }
-/* switch to a get current dev */
+/* get the id of the current GPU device */
 int XDevice::GetGPUDevice()
 {
 #ifdef USE_CUDA
@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice()
    cudaError_t error = cudaGetDevice(&devID);
    if (error != cudaSuccess){
-        fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error));
+        fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n",
+                devID, error, cudaGetErrorString(error));
        exit(1);
    }
@@ -248,7 +250,7 @@ void XDevice::SetFastFlags()
 #endif
 }
-/* reset cuda flag for more efficient cuda execution (all devices) */
+/* reset the cuda flag for more efficient cuda execution (all devices) */
 void XDevice::SetFastFlagsAllDevices()
 {
 #ifdef USE_CUDA
@@ -274,7 +276,7 @@ XDevManager::~XDevManager()
 }
-/* initialize it and get the CPU and GPU information */
+/* initialization */
 void XDevManager::Init()
 {
    srand((unsigned int)time(NULL));
@@ -318,7 +320,7 @@ void XDevManager::Clear()
 #ifdef USE_CUDA
-/* get the handle of GPU */
+/* get the handle of a given GPU */
 cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
 {
    CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
@@ -326,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
    return GPUs[devID].GetCublasHandle();
 }
-/* get the stream of cuda */
+/* get the stream of a given GPU */
 cudaStream_t * XDevManager::GetCudaStream(const int devID)
 {
    CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
@@ -523,7 +525,7 @@ get device ids for the given device information
             devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
             means that the first device is CPU, the second device
             is GPU-0, the third device is CPU.
->> devIDs - device sequence specified by devInfo
+>> devIDs - device IDs specified by devInfo
 << return - number of devices
 */
 int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
@@ -565,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
    return devCount;
 }
-/* show id sequence */
+/* show device IDs */
 void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
 {
    msg[0] = 0;

--- a/source/tensor/XMem.cpp
+++ b/source/tensor/XMem.cpp
@@ -63,7 +63,7 @@ constructor
 >> myMode - mode of running the memory pool
            UNI_FREE: free all the space at the end of using the memory pool
            FREE_ON_THE_FLY: normal "malloc" and "free" mode
->> myBlockSize - size of memory block
+>> myBlockSize - size of a memory block
 >> myBlockNum  - number of memory blocks
 >> myBufSize - size of buffer
 */
@@ -108,7 +108,7 @@ initialize it
 >> myMode - mode of running the memory pool
            UNI_FREE: free all the space at the end of using the memory pool
            FREE_ON_THE_FLY: normal "malloc" and "free" mode
->> myBlockSize - size of memory block
+>> myBlockSize - size of a memory block
 >> myBlockNum  - number of memory blocks
 >> myBufSize - size of buffer
 */
@@ -221,9 +221,9 @@ void XMem::Free(int myDevID, void * mem)
    }
 }
-/* 
+/*
-get signature 
+get the signature
-<< return - return the signature
+<< return - the signature
 */
 MTYPE XMem::GetSignature()
 {
@@ -231,7 +231,7 @@ MTYPE XMem::GetSignature()
 }
 /* 
-use string as the name of the memory pool 
+set the name of the memory pool 
 >> myName - name of the memory pool
 */
 void XMem::SetName(const char * myName)
@@ -264,7 +264,7 @@ void XMem::SetDevice(int myDevID)
 }
 /* 
-switch to the device (with fast cuda execution mode) we want to work 
+switch to the device (with fast cuda execution mode) we intend to work on
 >> myDevID - device id(-1: CPU memory, >=0: GPU device ID)
 */
 void XMem::SetDeviceFast(int myDevID)
@@ -280,7 +280,7 @@ void XMem::SetDeviceFast(int myDevID)
 }
 /* 
-run in static mode 
+run in the static mode
 >> myIsStatic - specify if the memory allocation is static
 */
 void XMem::SetStaticMode(bool myIsStatic)

--- a/source/tensor/XQueue-李垠桥的MacBook Pro.cpp
+++ b/source/tensor/XQueue-李垠桥的MacBook Pro.cpp
-/* NiuTrans.Tensor - an open-source tensor library
-/* NiuTrans.Tensor - an open-source tensor library
- * Copyright (C) 2017, Natural Language Processing Lab, Northestern University. 
- * All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * 
- * This is an implementation of queue. Actually we intend to use it to maintain
- * a priority job list
- *
- * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-04-05
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include "XQueue.h"
-#include "XDevice.h"
-#include "XList.h"
-#include "XUtility.h"
-/* the nts (NiuTrans.Tensor) namespace */
-namespace nts{
-/**************************************
-job item used in queues
-*/
-/* constructor */
-JobQueueNode::JobQueueNode()
-{
-    job  = NULL;
-    args = new TensorList(1);
-}
-/* de-constructor */
-JobQueueNode::~JobQueueNode()
-{
-    delete args;
-}
-/**************************************
-This class provides standard utilities of Queue.
-*/
-/* constuctor */
-XQueue::XQueue(int mySize)
-{
-    queue = new void*[mySize];
-    memset(queue, 0, sizeof(void*) * mySize);
-    size = mySize;
-    itemCount = 0;
-    head = 0;
-    tail = 0;
-    isJobQueue = false;
-    jobDequeuerArgs = new TensorList(1);
-    jobDequeuerBreak = false;
-    runningJobCount = 0;
-    jobStream = NULL;
-    jobStream1 = NULL;
-    jobStream2 = NULL;
-    MUTEX_INIT(enqueueMutex);
-    MUTEX_INIT(dequeueMutex);
-    COND_INIT(queueCond);
-    MUTEX_INIT(jobQueueMutex);
-}
-/* deconstructor */
-XQueue::~XQueue()
-{
-    delete[] queue;
-    delete jobDequeuerArgs;
-    delete jobStream;
-    delete jobStream1;
-    delete jobStream2;
-    //if(isJobQueue)
-    //    StopJobConsumer();
-    MUTEX_DELE(enqueueMutex);
-    MUTEX_DELE(dequeueMutex);
-    COND_DELE(queueCond);
-    MUTEX_DELE(jobQueueMutex);
-}
-/* 
-put an item in the tail of the queue 
->> item - the item we intend to add into the queue
-*/
-void XQueue::Enqueue(void * item)
-{
-    MUTEX_LOCK(enqueueMutex);
-    MUTEX_LOCK(dequeueMutex);
-    CheckNTErrors((itemCount < size), "Put too many items into the queue!");
-    queue[tail] = item;
-    tail = (tail + 1) % size;
-    itemCount++;
-    COND_SIGNAL(queueCond);
-    MUTEX_UNLOCK(dequeueMutex);
-    MUTEX_UNLOCK(enqueueMutex);
-}
-/* 
-fetch an item from head of the queue 
-<< return - the head item of the queue
-*/
-void * XQueue::Dequeue()
-{
-    MUTEX_LOCK(dequeueMutex);
-    while(itemCount == 0)
-    {
-#ifdef  WIN32
-        MUTEX_UNLOCK(dequeueMutex);
-#endif
-        COND_WAIT(queueCond, dequeueMutex);
-#ifdef  WIN32
-        MUTEX_LOCK(dequeueMutex);
-#endif
-    }
-    void * r = queue[head];
-    head = (head + 1) % size;
-    itemCount--;
-    MUTEX_UNLOCK(dequeueMutex);
-    return r;
-}
-/* return if the queue is empty */
-bool XQueue::IsEmpty()
-{
-    return itemCount == 0;
-}
-/* wait until the queue is empty */
-void XQueue::WaitForEmptyJobQueue()
-{
-    while(runningJobCount > 0){
-        XSleep(10);
-    }
-    if(jobStream != NULL){
-        CheckNTErrors((jobStream->IsFinished()), "None fineished jobs remain");
-        jobStream->Clear();
-    }
-    if(jobStream1 != NULL){
-        CheckNTErrors((jobStream1->IsFinished()), "None fineished jobs remain");
-        jobStream1->Clear();
-    }
-    if(jobStream2 != NULL){
-        CheckNTErrors((jobStream2->IsFinished()), "None fineished jobs remain");
-        jobStream2->Clear();
-    }
-}
-int devids[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-int cpuid = -1;
-/* 
-run job consumer (in another thread) 
->> jobDevID - id of the device for running the jobs
-*/
-void XQueue::RunJobConsumer(int jobDevID)
-{
-    CheckNTErrors((jobDevID < 16), "device id is out of scope!");
-    isJobQueue = true;
-    jobDequeuerArgs->Clear();
-    jobDequeuerArgs->Add(this);
-    jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid);
-    jobDequeuer.function = (TFunction)DequeueJobs;
-    jobDequeuer.argv = jobDequeuerArgs;
-    jobDequeuer.Start();
-    jobDequeuer.LetItGo();
-}
-/* stop the job consumer */
-void XQueue::StopJobConsumer()
-{
-    jobDequeuerBreak = true;
-    XSleep(10);
-    EnqueueJob(NULL, NULL);
-    jobDequeuer.End();
-    isJobQueue = false;
-}
-/* add a job item to process */
-void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
-{
-    MUTEX_LOCK(jobQueueMutex);
-    runningJobCount++;
-    MUTEX_UNLOCK(jobQueueMutex);
-    JobQueueNode * node = new JobQueueNode();
-    node->job = job;
-    if(jobArgs != NULL)
-        node->args->AddList(jobArgs);
-    Enqueue(node);
-}
-/* job item consumer */
-void XQueue::DequeueJobs(TensorList * args)
-{
-    CheckNTErrors((args->count == 2), "Illegal arguments!");
-    XQueue * q = (XQueue*)args->GetItem(0);
-    int devID = *(int*)args->GetItem(1);
-    int devIDBackup = XDevice::GetGPUDevice();
-    if(devID >= 0)
-        XDevice::SetGPUDevice(devID);
-    while(1){
-        JobQueueNode * node = (JobQueueNode*)q->Dequeue();
-        if(q->GetJobBreak())
-            break;
-        CheckNTErrors((node != NULL), "Illegal job!");
-        /* process a job */
-        ((TFunction)node->job)(node->args);
-        delete node;
-        MUTEX_LOCK(q->jobQueueMutex);
-        q->runningJobCount--;
-        MUTEX_UNLOCK(q->jobQueueMutex);
-    }
-    if(devID >= 0)
-        XDevice::SetGPUDevice(devIDBackup);
-}
-/* get the break flag */
-bool XQueue::GetJobBreak()
-{
-    return jobDequeuerBreak;
-}
-/* get job stream */
-XStream * XQueue::GetJobStream(int n)
-{
-    if(n == 0)
-        return jobStream;
-    else if(n == 1)
-        return jobStream1;
-    else if(n == 2)
-        return jobStream2;
-    else{
-        ShowNTErrors("invalid stream id!");
-    }
-    return NULL;
-}
-/* make job streams */
-void XQueue::MakeJobStreams(int devID, int devID1, int devID2)
-{
-    if(devID != INVALID_DEVICE_ID)
-        jobStream = new XStream(0, devID);
-    if(devID1 != INVALID_DEVICE_ID)
-        jobStream1 = new XStream(0, devID1);
-    if(devID2 != INVALID_DEVICE_ID)
-        jobStream2 = new XStream(0, devID2);
-}
-} /* end of the nts (NiuTrans.Tensor) namespace */
--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -81,11 +81,7 @@ int MakeTensorID()
    return id;
 }
-/* 
+/* constructor */
-constructor 
->> myOrder - order of the tensor
->> myMem - memory pool used to allocating the data array
-*/
 XTensor::XTensor()
 {
    Init();
@@ -130,9 +126,9 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
 /* 
 constructor 
 >> myOrder - order of the tensor
->> myDimSize - the size of each dimension
+>> myDimSize - size of each dimension
 >> myDataType - unit size (e.g., int, float, and double)
->> myDenseRatio - how often an element has non-zero value
+>> myDenseRatio - how often an element has a non-zero value
 >> myDevID - device id
 >> myMem - memory pool used to allocating the data array
 */
@@ -168,10 +164,10 @@ XTensor::XTensor(const XTensor &reference)
        signature = reference.signature;
        /* what we really want to do is "reference.data = NULL;"
-           As "reference" is constant, we cannot reset reference.data
+           As "reference" is constant, we cannot reset "reference.data"
-           here. So we save the ADDRESS of reference.data in
+           here. So we save the ADDRESS of "reference.data" in
-           reference.dataP, and do this work by updating "*reference.dataP".
+           "reference.dataP", and do this work by updating "*reference.dataP".
-           This is VERY tricky and might not be the best solution :) */
+           This is VERY tricky and there might be better solutions :) */
        *reference.dataP = NULL;
    }
    else{
@@ -208,10 +204,10 @@ XTensor::XTensor(const XTensor &&reference)
    signature = reference.signature;
    /* what we really want to do is "reference.data = NULL;"
-       As "reference" is constant, we cannot reset reference.data
+       As "reference" is constant, we cannot reset "reference.data"
-       here. So we save the ADDRESS of reference.data in
+       here. So we save the ADDRESS of "reference.data" in
-       reference.dataP, and do this work by updating "*reference.dataP".
+       "reference.dataP", and do this work by updating "*reference.dataP".
-       This is VERY tricky and might not be the best solution :) */
+       This is VERY tricky and there might be better solutions :) */
    *reference.dataP = NULL;
    XLink::Replace(&reference, this);
@@ -305,7 +301,7 @@ void XTensor::DestroyData()
 }
 /* 
-shallow copy of tensor
+shallow copy of the tensor
 Note that we do not copy data array here
 >> tensor - the source tensor
 */
@@ -353,7 +349,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
    }
    if(false && !tensor.isTmp){
-        /* NOTE: this might lead to additional data copy on Mac machines */
+        /* NOTE: this might lead to additional data copy by Mac LLVM compilers */
        /* we make an identity transformation here */
        if(outgo.tailNum > 0)
@@ -440,10 +436,10 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
    signature = tensor.signature;
    /* what we really want to do is "reference.data = NULL;"
-       As "reference" is constant, we cannot reset reference.data
+       As "reference" is constant, we cannot reset "reference.data"
-       here. So we save the ADDRESS of reference.data in
+       here. So we save the ADDRESS of "reference.data" in
-       reference.dataP, and do this work by updating "*reference.dataP".
+       "reference.dataP", and do this work by updating "*reference.dataP".
-       This is VERY tricky and might not be the best solution :) */
+       This is VERY tricky and there might be better solutions :) */
    *tensor.dataP = NULL;
    XLink::Replace(&tensor, this);
@@ -526,7 +522,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
 }
 /* 
-judge whether the two matrices are in the same type and size 
+check whether the two matrices are in the same type and size
 >> a - input tensor
 >> b - anther tensor to compare with
 << return - whether the two input tensors are identical
@@ -556,6 +552,18 @@ bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b)
    return true;
 }
+/*
+check whether the three matrices are in the same type and size
+>> a - input tensor
+>> b - anther tensor to compare with
+>> c - a tensor again
+<< return - whether the two input tensors are identical
+*/
+bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
+{
+    return IsSameShaped(a, b) && IsSameShaped(a, c);
+}
 bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
 {
    if (a == NULL || b == NULL)
@@ -588,18 +596,6 @@ bool XTensor::IsReduceShaped(const XTensor * a, const XTensor * b, int dim)
 }
 /* 
-judge whether the three matrices are in the same type and size 
->> a - input tensor
->> b - anther tensor to compare with
->> c - a tensor again
-<< return - whether the two input tensors are identical
-*/
-bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
-{
-    return IsSameShaped(a, b) && IsSameShaped(a, c);
-}
-/* 
 set the size of each dimension 
 >> myDimSize - size of each dimension
 */
@@ -630,7 +626,7 @@ int XTensor::GetDim(const int dim) const
 /* 
 reshape the tensor 
 >> myOrder - order of the tensor
->> myDimSize - the size of each dimension
+>> myDimSize - size of each dimension
 */
 void XTensor::Reshape(const int myOrder, const int * myDimSize)
 {
@@ -652,7 +648,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize)
 }
 /* 
-reshape the tensor to a vector 
+reshape the tensor into a vector
 >> num - number of elements
 */
 void XTensor::Reshape(const int num)
@@ -662,7 +658,7 @@ void XTensor::Reshape(const int num)
 }
 /* 
-reshape the tensor to a matrix 
+reshape the tensor into a matrix
 >> rowNum - number of rows
 >> colNum - number of columns
 */
@@ -708,7 +704,7 @@ int XTensor::GetSize() const
        return unitNum;
 }
-/* get size of the memory used */
+/* get the size of the memory space used */
 int XTensor::GetDataSizeInChar()
 {
    if(isSparse){
@@ -826,7 +822,7 @@ void XTensor::SetZeroAll(XStream * stream)
 /*  set the tensor with an data array 
 >> d - input data. it must be on CPU
 >> num - number of data items
->> beg - where we start this in the data array of the tensor
+>> beg - where we start the data copy in the data array of the tensor
 */
 void XTensor::SetData(const void * d, int num, int beg)
 {
@@ -846,7 +842,7 @@ set the tensor items by a uniform distribution in range [lower, upper]
 */
 void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
 {
-    // TODO: cuda code!!!!!!!
+    // TODO: GPU code!!!!!!!
    if (data == NULL)
        return;
@@ -884,7 +880,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
 /* a gauss distribution (Box-Muller method) */
 double GaussRand(DTYPE mean, DTYPE standardDeviation)
 {
-    // TODO: cuda code!!!!!!!
+    // TODO: GPU code!!!!!!!
    static double u, v;
    static int phase = 0;
@@ -947,7 +943,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
 /* 
 set tensor items with an array of offsets 
 >> offsets - offset for each data item
->> value - value for data items
+>> value - value for the data items
 >> num - number of the data items
 */
 void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num)
@@ -967,7 +963,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
 }
 /* check whether the data array is the same as the answer
->> d - input data. it must be on CPU
+>> d - input data (it must be on CPUs)
 >> num - number of data items
 >> beg - where we start this in the data array of the tensor
 */
@@ -1001,7 +997,7 @@ void XTensor::SetDataPointer()
    dataP = &data;
 }
-/* compare two number */
+/* compare two numbers */
 bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
 {
    if(a == b)
@@ -1014,7 +1010,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
        return (fabs((a - b) / a) < relError) ? true : false;
 }
-/* check whether the data array is the same as the answer */
+/* check whether the data array is the same as the "answer" */
 bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
 {
    if (data == NULL || d == NULL)
@@ -1088,7 +1084,7 @@ void XTensor::SetAscendingOrder(int dim)
 /* 
 get the value of a cell with the index 
 >> index - index of each dimension
->> size - size of index
+>> size - size of the index
 << return - cell value
 */
 DTYPE XTensor::Get(int index[], int size)
@@ -1099,7 +1095,7 @@ DTYPE XTensor::Get(int index[], int size)
 }
 /*
-get the value of a cell with the offset
+get the value of a cell with its offset
 >> offset - offset in the array
 << return - cell value
 */
@@ -1689,7 +1685,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
 }
 /* 
-resize a tensor by another one 
+resize a tensor by another
 >> myTensor - tensor for reference
 */
 bool XTensor::Resize(const XTensor * myTensor)
@@ -1711,7 +1707,7 @@ binary search to find an element in a sparse tensor
 >> value - value for return
 >> position - the position of the tuple.
              it is the previous one if there is no hit
-<< return - find it or not?
+<< return - found it or not?
 */
 bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
 {
@@ -1880,10 +1876,10 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, 
 /* 
 dump data to a file
->> tensor - tensor whose data is dumped
+>> tensor - the tensor for dumping
 >> file - where to domp the data
 >> label - label of the tensor
->> n - number of items to dump
+>> n - number of the items to dump
 >> beg - the first item id
 >> verbose - verbose level
 */
@@ -2050,7 +2046,7 @@ void XTensor::FlushToMem(XMem * targetMem)
 allocate the memory space of the tensor (in the global memory) 
 >> tensor - the tensor we intend to process
 >> myMem - the memory pool we are using
->> useBuf - use the buffer in the memory pool
+>> useBuf - indicates whether we use the buffer in the memory pool
 */
 void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
 {
@@ -2082,7 +2078,7 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
 free the memory space of the tensor (in the global memory) 
 >> tensor - the tensor we intend to process
 >> myMem - the memory pool we are using
->> useBuf - use the buffer in the memory pool
+>> useBuf - indicates whether we use the buffer in the memory pool
 */
 void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
 {