new comments of the code

7c17670d · xiaotong · 4565ff2b · 7c17670d · 7c17670d · 7c17670d
Commit 7c17670d authored Jul 16, 2019 by xiaotong
--- a/source/sample/transformer/T2TBatchLoader.cpp
+++ b/source/sample/transformer/T2TBatchLoader.cpp
@@ -166,6 +166,8 @@ int T2TBatchLoader::LoadBuf(FILE * file, bool isSorted, int step)

        if(wordCount >= bufSize - MAX_SEQUENCE_LENGTH)
            break;
+
+        CheckNTErrors(seqCount % step == 0, "Wrong number of sequences!");
    }

    nseqBuf = seqCount;

--- a/source/sample/transformer/T2TSearch.cpp
+++ b/source/sample/transformer/T2TSearch.cpp
@@ -292,10 +292,10 @@ void T2TSearch::Generate(T2TStateBundle * beam)
    
    CopyValues(index, preID);
    
-    /* "preID" represents the id (or the offset) of previous state used to make the current
+    /* "preID" represents the id (or the offset) of the previous state used to make the current
       hypothesis. Note that we reshape the "score" tensor into a matrix where each
-       row means a previous state. The column number is size-of-beam * vocab-size. We,
-       therefore, divide entries of the top-k index by vocab-size to compute the id of 
+       row means a previous state. The column number is size-of-beam \times vocab-size. We,
+       therefore, divide entries of the top-k index by vocab-size to compute the id of the
       previous state for each hypothesis in the top-k list. */
    Descale(preID, sizeVocab);
    

--- a/source/tensor/XDevice.cpp
+++ b/source/tensor/XDevice.cpp
@@ -201,7 +201,8 @@ void XDevice::SetGPUDevice(int devID)
    cudaError_t error = cudaSetDevice(devID);

    if (error != cudaSuccess){
-        fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error));
+        fprintf(stderr, "Error! Calling cudaSetDevice(%d) fails(%d:%s)\n", 
+                devID, error, cudaGetErrorString(error));
        exit(1);
    }
 #else
@@ -216,7 +217,7 @@ void XDevice::SetGPUDeviceFast(int devID)
    SetFastFlags();
 }

-/* switch to a get current dev */
+/* get the id of the current GPU device */
 int XDevice::GetGPUDevice()
 {
 #ifdef USE_CUDA
@@ -224,7 +225,8 @@ int XDevice::GetGPUDevice()
    cudaError_t error = cudaGetDevice(&devID);

    if (error != cudaSuccess){
-        fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", devID, error, cudaGetErrorString(error));
+        fprintf(stderr, "Error! Calling cudaGetDevice(%d) fails(%d:%s)\n", 
+                devID, error, cudaGetErrorString(error));
        exit(1);
    }

@@ -248,7 +250,7 @@ void XDevice::SetFastFlags()
 #endif
 }

-/* reset cuda flag for more efficient cuda execution (all devices) */
+/* reset the cuda flag for more efficient cuda execution (all devices) */
 void XDevice::SetFastFlagsAllDevices()
 {
 #ifdef USE_CUDA
@@ -274,7 +276,7 @@ XDevManager::~XDevManager()
 }


-/* initialize it and get the CPU and GPU information */
+/* initialization */
 void XDevManager::Init()
 {
    srand((unsigned int)time(NULL));
@@ -318,7 +320,7 @@ void XDevManager::Clear()

 #ifdef USE_CUDA

-/* get the handle of GPU */
+/* get the handle of a given GPU */
 cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
 {
    CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
@@ -326,7 +328,7 @@ cublasHandle_t * XDevManager::GetCudaHandle(const int devID)
    return GPUs[devID].GetCublasHandle();
 }

-/* get the stream of cuda */
+/* get the stream of a given GPU */
 cudaStream_t * XDevManager::GetCudaStream(const int devID)
 {
    CheckNTErrors(devID < nGPU, "index of GPU is out of range.");
@@ -523,7 +525,7 @@ get device ids for the given device information
             devInfo = "0:CPU-1 1:GPU-0 2:CPU-1"
             means that the first device is CPU, the second device
             is GPU-0, the third device is CPU.
->> devIDs - device sequence specified by devInfo
+>> devIDs - device IDs specified by devInfo
 << return - number of devices
 */
 int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
@@ -565,7 +567,7 @@ int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
    return devCount;
 }

-/* show id sequence */
+/* show device IDs */
 void XDevManager::ShowDeviceIDs(char * devInfo, char * msg)
 {
    msg[0] = 0;

--- a/source/tensor/XMem.cpp
+++ b/source/tensor/XMem.cpp
@@ -58,7 +58,7 @@ constructor
 >> myMode - mode of running the memory pool
            UNI_FREE: free all the space at the end of using the memory pool
            FREE_ON_THE_FLY: normal "malloc" and "free" mode
->> myBlockSize - size of memory block
+>> myBlockSize - size of a memory block
 >> myBlockNum  - number of memory blocks
 >> myBufSize - size of buffer
 */
@@ -103,7 +103,7 @@ initialize it
 >> myMode - mode of running the memory pool
            UNI_FREE: free all the space at the end of using the memory pool
            FREE_ON_THE_FLY: normal "malloc" and "free" mode
->> myBlockSize - size of memory block
+>> myBlockSize - size of a memory block
 >> myBlockNum  - number of memory blocks
 >> myBufSize - size of buffer
 */
@@ -217,8 +217,8 @@ void XMem::Free(int myDevID, void * mem)
 }

 /* 
-get signature 
-<< return - return the signature
+get the signature 
+<< return - the signature
 */
 MTYPE XMem::GetSignature()
 {
@@ -226,7 +226,7 @@ MTYPE XMem::GetSignature()
 }

 /* 
-use string as the name of the memory pool 
+set the name of the memory pool 
 >> myName - name of the memory pool
 */
 void XMem::SetName(const char * myName)
@@ -259,7 +259,7 @@ void XMem::SetDevice(int myDevID)
 }

 /* 
-switch to the device (with fast cuda execution mode) we want to work 
+switch to the device (with fast cuda execution mode) we intend to work on
 >> myDevID - device id(-1: CPU memory, >=0: GPU device ID)
 */
 void XMem::SetDeviceFast(int myDevID)
@@ -275,7 +275,7 @@ void XMem::SetDeviceFast(int myDevID)
 }

 /* 
-run in static mode 
+run in the static mode 
 >> myIsStatic - specify if the memory allocation is static
 */
 void XMem::SetStaticMode(bool myIsStatic)

--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -81,11 +81,7 @@ int MakeTensorID()
    return id;
 }

-/* 
-constructor 
->> myOrder - order of the tensor
->> myMem - memory pool used to allocating the data array
-*/
+/* constructor */
 XTensor::XTensor()
 {
    Init();
@@ -130,9 +126,9 @@ XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
 /* 
 constructor 
 >> myOrder - order of the tensor
->> myDimSize - the size of each dimension
+>> myDimSize - size of each dimension
 >> myDataType - unit size (e.g., int, float, and double)
->> myDenseRatio - how often an element has non-zero value
+>> myDenseRatio - how often an element has a non-zero value
 >> myDevID - device id
 >> myMem - memory pool used to allocating the data array
 */
@@ -168,10 +164,10 @@ XTensor::XTensor(const XTensor &reference)
        signature = reference.signature;
        
        /* what we really want to do is "reference.data = NULL;"
-           As "reference" is constant, we cannot reset reference.data
-           here. So we save the ADDRESS of reference.data in
-           reference.dataP, and do this work by updating "*reference.dataP".
-           This is VERY tricky and might not be the best solution :) */
+           As "reference" is constant, we cannot reset "reference.data"
+           here. So we save the ADDRESS of "reference.data" in
+           "reference.dataP", and do this work by updating "*reference.dataP".
+           This is VERY tricky and there might be better solutions :) */
        *reference.dataP = NULL;
    }
    else{
@@ -208,10 +204,10 @@ XTensor::XTensor(const XTensor &&reference)
    signature = reference.signature;
        
    /* what we really want to do is "reference.data = NULL;"
-       As "reference" is constant, we cannot reset reference.data
-       here. So we save the ADDRESS of reference.data in
-       reference.dataP, and do this work by updating "*reference.dataP".
-       This is VERY tricky and might not be the best solution :) */
+       As "reference" is constant, we cannot reset "reference.data"
+       here. So we save the ADDRESS of "reference.data" in
+       "reference.dataP", and do this work by updating "*reference.dataP".
+       This is VERY tricky and there might be better solutions :) */
    *reference.dataP = NULL;

    XLink::Replace(&reference, this);
@@ -305,7 +301,7 @@ void XTensor::DestroyData()
 }

 /* 
-shallow copy of tensor
+shallow copy of the tensor
 Note that we do not copy data array here
 >> tensor - the source tensor
 */
@@ -353,7 +349,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
    }

    if(false && !tensor.isTmp){
-        /* NOTE: this might lead to additional data copy on Mac machines */
+        /* NOTE: this might lead to additional data copy by Mac LLVM compilers */
        /* we make an identity transformation here */
        
        if(outgo.tailNum > 0)
@@ -440,10 +436,10 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
    signature = tensor.signature;
        
    /* what we really want to do is "reference.data = NULL;"
-       As "reference" is constant, we cannot reset reference.data
-       here. So we save the ADDRESS of reference.data in
-       reference.dataP, and do this work by updating "*reference.dataP".
-       This is VERY tricky and might not be the best solution :) */
+       As "reference" is constant, we cannot reset "reference.data"
+       here. So we save the ADDRESS of "reference.data" in
+       "reference.dataP", and do this work by updating "*reference.dataP".
+       This is VERY tricky and there might be better solutions :) */
    *tensor.dataP = NULL;

    XLink::Replace(&tensor, this);
@@ -526,7 +522,7 @@ void XTensor::SetDevice(int myDevId, XMem * myMem)
 }

 /* 
-judge whether the two matrices are in the same type and size 
+check whether the two matrices are in the same type and size 
 >> a - input tensor
 >> b - anther tensor to compare with
 << return - whether the two input tensors are identical
@@ -557,7 +553,7 @@ bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b)
 }

 /* 
-judge whether the three matrices are in the same type and size 
+check whether the three matrices are in the same type and size 
 >> a - input tensor
 >> b - anther tensor to compare with
 >> c - a tensor again
@@ -599,7 +595,7 @@ int XTensor::GetDim(const int dim) const
 /* 
 reshape the tensor 
 >> myOrder - order of the tensor
->> myDimSize - the size of each dimension
+>> myDimSize - size of each dimension
 */
 void XTensor::Reshape(const int myOrder, const int * myDimSize)
 {
@@ -621,7 +617,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize)
 }

 /* 
-reshape the tensor to a vector 
+reshape the tensor into a vector 
 >> num - number of elements
 */
 void XTensor::Reshape(const int num)
@@ -631,7 +627,7 @@ void XTensor::Reshape(const int num)
 }

 /* 
-reshape the tensor to a matrix 
+reshape the tensor into a matrix 
 >> rowNum - number of rows
 >> colNum - number of columns
 */
@@ -677,7 +673,7 @@ int XTensor::GetSize() const
        return unitNum;
 }

-/* get size of the memory used */
+/* get the size of the memory space used */
 int XTensor::GetDataSizeInChar()
 {
    if(isSparse){
@@ -795,7 +791,7 @@ void XTensor::SetZeroAll(XStream * stream)
 /*  set the tensor with an data array 
 >> d - input data. it must be on CPU
 >> num - number of data items
->> beg - where we start this in the data array of the tensor
+>> beg - where we start the data copy in the data array of the tensor
 */
 void XTensor::SetData(const void * d, int num, int beg)
 {
@@ -815,7 +811,7 @@ set the tensor items by a uniform distribution in range [lower, upper]
 */
 void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
 {
-    // TODO: cuda code!!!!!!!
+    // TODO: GPU code!!!!!!!

    if (data == NULL)
        return;
@@ -853,7 +849,7 @@ void XTensor::SetDataRand(DTYPE lower, DTYPE upper)
 /* a gauss distribution (Box-Muller method) */
 double GaussRand(DTYPE mean, DTYPE standardDeviation)
 {
-    // TODO: cuda code!!!!!!!
+    // TODO: GPU code!!!!!!!

    static double u, v;
    static int phase = 0;
@@ -916,7 +912,7 @@ void XTensor::SetDataRandn(DTYPE mean, DTYPE standardDeviation)
 /* 
 set tensor items with an array of offsets 
 >> offsets - offset for each data item
->> value - value for data items
+>> value - value for the data items
 >> num - number of the data items
 */
 void XTensor::SetDataBatched(MTYPE * offsets, DTYPE value, int num)
@@ -936,7 +932,7 @@ void XTensor::SetDataBatchedWithValues(MTYPE * offsets, void * values, int num)
 }

 /* check whether the data array is the same as the answer
->> d - input data. it must be on CPU
+>> d - input data (it must be on CPUs)
 >> num - number of data items
 >> beg - where we start this in the data array of the tensor
 */
@@ -970,7 +966,7 @@ void XTensor::SetDataPointer()
    dataP = &data;
 }

-/* compare two number */
+/* compare two numbers */
 bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
 {
    if(a == b)
@@ -983,7 +979,7 @@ bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
        return (fabs((a - b) / a) < relError) ? true : false;
 }

-/* check whether the data array is the same as the answer */
+/* check whether the data array is the same as the "answer" */
 bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
 {
    if (data == NULL || d == NULL)
@@ -1057,7 +1053,7 @@ void XTensor::SetAscendingOrder(int dim)
 /* 
 get the value of a cell with the index 
 >> index - index of each dimension
->> size - size of index
+>> size - size of the index
 << return - cell value
 */
 DTYPE XTensor::Get(int index[], int size)
@@ -1068,7 +1064,7 @@ DTYPE XTensor::Get(int index[], int size)
 }
    
 /*
-get the value of a cell with the offset
+get the value of a cell with its offset
 >> offset - offset in the array
 << return - cell value
 */
@@ -1658,7 +1654,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
 }

 /* 
-resize a tensor by another one 
+resize a tensor by another
 >> myTensor - tensor for reference
 */
 bool XTensor::Resize(const XTensor * myTensor)
@@ -1680,7 +1676,7 @@ binary search to find an element in a sparse tensor
 >> value - value for return
 >> position - the position of the tuple.
              it is the previous one if there is no hit
-<< return - find it or not?
+<< return - found it or not?
 */
 bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
 {
@@ -1849,10 +1845,10 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, 

 /* 
 dump data to a file
->> tensor - tensor whose data is dumped
+>> tensor - the tensor for dumping
 >> file - where to domp the data
 >> label - label of the tensor
->> n - number of items to dump
+>> n - number of the items to dump
 >> beg - the first item id
 >> verbose - verbose level
 */
@@ -2019,7 +2015,7 @@ void XTensor::FlushToMem(XMem * targetMem)
 allocate the memory space of the tensor (in the global memory) 
 >> tensor - the tensor we intend to process
 >> myMem - the memory pool we are using
->> useBuf - use the buffer in the memory pool
+>> useBuf - indicates whether we use the buffer in the memory pool
 */
 void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
 {
@@ -2051,7 +2047,7 @@ void XTensor::AllocateData(XTensor * tensor, XMem * myMem, bool useBuf)
 free the memory space of the tensor (in the global memory) 
 >> tensor - the tensor we intend to process
 >> myMem - the memory pool we are using
->> useBuf - use the buffer in the memory pool
+>> useBuf - indicates whether we use the buffer in the memory pool
 */
 void XTensor::FreeData(XTensor * tensor, XMem * myMem, bool useBuf)
 {