separating header and implementation files for XList, now XList refers to a list containing void*

913e2251 · huchi · 1f5627f9 · 913e2251 · 913e2251 · 913e2251
Commit 913e2251 authored Jul 02, 2019 by huchi
--- a/source/network/XBackwardShape.cpp
+++ b/source/network/XBackwardShape.cpp
@@ -232,8 +232,8 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
    CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");

    XTensor * last = NULL;
-    XList smalls(income.tailNum);
-    XList smallsGrad(income.tailNum);
+    TensorList smalls(income.tailNum);
+    TensorList smallsGrad(income.tailNum);
    bool mergeOnly = true;
    for(int i = 0; i < income.tailNum; i++){
        XTensor * tail = income.tails[i];
@@ -401,7 +401,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
    /* we compute the gradient for current node, rather than for
       child node, i.e., we use the outgoing edge here */
    XLink &outgo = node->outgo;
-    XList splits(outgo.tailNum);
+    TensorList splits(outgo.tailNum);
    int whereToSplit = -1;
    int splitNum = 0;


--- a/source/network/XNet.cpp
+++ b/source/network/XNet.cpp
@@ -79,13 +79,13 @@ backward propagation to obtain gradient
 */
 void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
 {
-    XList roots(1);
+    TensorList roots(1);
    roots.Add(&root);

-    XList golds(1);
+    TensorList golds(1);
    golds.Add(NULL);

-    XList paddings(1);
+    TensorList paddings(1);
    paddings.Add(NULL);

    Backward(roots, golds, paddings, loss);
@@ -99,13 +99,13 @@ backward propagation to obtain gradient wrt. the loss/error function
 */
 void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
 {
-    XList roots(1);
+    TensorList roots(1);
    roots.Add(&root);

-    XList golds(1);
+    TensorList golds(1);
    golds.Add(&gold);

-    XList paddings(1);
+    TensorList paddings(1);
    paddings.Add(NULL);

    Backward(roots, golds, paddings, loss);
@@ -120,13 +120,13 @@ backward propagation to obtain gradient wrt. the loss/error function
 */
 void XNet::Backward(XTensor &root, XTensor &gold, XTensor &padding, LOSS_FUNCTION_NAME loss)
 {
-    XList roots(1);
+    TensorList roots(1);
    roots.Add(&root);

-    XList golds(1);
+    TensorList golds(1);
    golds.Add(&gold);

-    XList paddings(1);
+    TensorList paddings(1);
    paddings.Add(&padding);

    Backward(roots, golds, paddings, loss);
@@ -138,10 +138,10 @@ with a number of root nodes
 >> roots - a list of root nodes (output) of the network
 >> loss - name of loss function
 */
-void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
+void XNet::Backward(TensorList &roots, LOSS_FUNCTION_NAME loss)
 {
-    XList golds(roots.count);
-    XList paddings(roots.count);
+    TensorList golds(roots.count);
+    TensorList paddings(roots.count);
    for (int i = 0; i < roots.count; i++) {
        golds.Add(NULL);
        paddings.Add(NULL);
@@ -157,9 +157,9 @@ with a number of root nodes
 >> golds - a list of gold standard for the output
 >> loss - name of loss function
 */
-void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
+void XNet::Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss)
 {
-    XList paddings(roots.count);
+    TensorList paddings(roots.count);
    for (int i = 0; i < roots.count; i++)
        paddings.Add(NULL);

@@ -174,7 +174,7 @@ with a number of root nodes
 >> paddings - specify a target value that is ignored
 >> loss - name of loss function
 */
-void XNet::Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss)
+void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss)
 {
    Traverse(roots);

@@ -300,7 +300,7 @@ depth-first search (Tarjan's algorithm)
 */
 void XNet::Traverse(XTensor &root)
 {
-    XList roots(1);
+    TensorList roots(1);
    roots.Add(&root);

    Traverse(roots);
@@ -311,7 +311,7 @@ traverse the net and find the topological order by
 depth-first search (Tarjan's algorithm) 
 >> roots - a list of roots (or output nodes)
 */
-void XNet::Traverse(XList &roots)
+void XNet::Traverse(TensorList &roots)
 {
    id = MakeNetID();
    nodes.Clear();
@@ -336,7 +336,7 @@ depth-first search given a node (Tarjan's algorithm for topological ordering)
 >> orders - topological order of the nodes
 >> code - code of the network
 */
-void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
+void XNet::TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code)
 {
    if(node == NULL)
        return;
@@ -444,7 +444,7 @@ show network topology
 */
 void XNet::ShowNetwork(FILE * file, XTensor * node)
 {
-    XList roots(1);
+    TensorList roots(1);
    roots.Add(node);

    Traverse(roots);

--- a/source/network/XNet.h
+++ b/source/network/XNet.h
@@ -36,16 +36,16 @@ struct XNet
    unsigned int id;

    /* tensor nodes of the network (in order) */
-    XList nodes;
+    TensorList nodes;

    /* tensor nodes to keep gradient for output (e.g., SGD)*/
-    XList gradNodes;
+    TensorList gradNodes;

    /* output nodes of the network */
-    XList outputs;
+    TensorList outputs;

    /* input nodes of the network */
-    XList inputs;
+    TensorList inputs;

    /* indicates whether the network just keeps the gradient for parameter tensors */
    bool isGradEfficient;
@@ -70,15 +70,15 @@ struct XNet

    /* backward propagation to obtain gradient
       with a number of root nodes */
-    void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
+    void Backward(TensorList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);

    /* backward propagation to obtain gradient
       with a number of root nodes */
-    void Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);
+    void Backward(TensorList &roots, TensorList &golds, LOSS_FUNCTION_NAME loss = NOLOSS);

    /* backward propagation to obtain gradient wrt. the loss/error function
       with a number of root nodes */
-    void Backward(XList &roots, XList &golds, XList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);
+    void Backward(TensorList &roots, TensorList &golds, TensorList &paddings, LOSS_FUNCTION_NAME loss = NOLOSS);

    /* backward computation for a given node */
    void BackwardNode(XTensor * node, bool isEfficent = false);
@@ -92,10 +92,10 @@ struct XNet

    /* traverse the net and find the topological order by 
       depth-first search (Tarjan's algorithm) */
-    void Traverse(XList &roots);
+    void Traverse(TensorList &roots);

    /* depth-first search given a node (Tarjan's algorithm for topological ordering) */
-    void TarjanVisit(XTensor * node, XList &orders, const unsigned int code);
+    void TarjanVisit(XTensor * node, TensorList &orders, const unsigned int code);

    /* dump network information */
    void Dump(FILE * file);

--- a/source/sample/fnnlm/FNNLM.cpp
+++ b/source/sample/fnnlm/FNNLM.cpp
@@ -20,7 +20,7 @@
 * This is a simple impelementation of the feed-forward network-baesd language
 * model (FNNLM). See more details about FNNLM in
 * "A Neural Probabilistic Language Model" by Bengio et al.
- * Journal of Machine Learning Research 3 (2003) 1137C1155
+ * Journal of Machine Learning Research 3 (2003) 1137�C1155
 *
 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
 */
@@ -537,8 +537,8 @@ update the model parameters using the delta rule
 */
 void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
 {
-    XList paraList(10);
-    XList gradList(10);
+    TensorList paraList(10);
+    TensorList gradList(10);

    paraList.Add(&model.outputW);
    paraList.Add(&model.outputB);
@@ -765,7 +765,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
    int batchSize = -1;
    int n = model.n;
    int depth = model.hDepth;
-    XList eList(n - 1);
+    TensorList eList(n - 1);

    /* previoius n - 1 words */
    for(int i = 0; i < n - 1; i++){
@@ -943,7 +943,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
            _CopyValues(&dedx, &gradPassed);
    }

-    XList eList(n - 1);
+    TensorList eList(n - 1);

    /* back-propagation for the embedding layer */
    for (int i = 0; i < n - 1; i++) {
@@ -1036,7 +1036,7 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
    XTensor hidden;
    XTensor b;

-    XList inputList(n - 1);
+    TensorList inputList(n - 1);
    for(int i = 0; i < n - 1; i++)
        inputList.Add(inputs + i);


--- a/source/sample/main.cpp
+++ b/source/sample/main.cpp
@@ -21,40 +21,19 @@ void TestDataManager() {
 	const int indices[] = { 0, 1 };
 	dataSet.LoadBatch(src, indices, sizeof(indices) / sizeof(*indices), srcField);
 	dataSet.LoadBatch(tgt, indices, sizeof(indices) / sizeof(*indices), tgtField);
-	//   
-	//   tgt.Dump(stderr);
-	//   src.Dump(stderr);
-	//XListV2<int> list(10);
-	//int* a = new int[10]{1,2,3,4,5,6,7,8,9};
-	//list.Add(a);
-	//auto x = list.Get(0);
-	//cout << x[0] << endl;
-	//list.Remove(0);
-	//auto y = list.Get(0);
-	//cout << x[0] << endl;
-	//delete[] a;

-	XList list(10);
-	XTensor a,b,c;
-	InitTensor2D(&a, 2, 2);
-	InitTensor2D(&b, 2, 2);
-	InitTensor2D(&c, 2, 2);
-	float arr[] = { 1., 2., 3., 4. };
-	a.SetData(arr, 4);
-	b.SetData(arr, 4);
-	//c.SetZeroAll();
-	_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c);
-	c.Dump(stderr);
-	CharList str(10);
-	char* s = new char(10);
-	for (int i = 0; i < 9; ++i) {
-		s[i] = i + 'a';
+	IntList str(10);
+	for (int i = 9; i > 0; --i) {
+		str.Add(i);
 	}
-	s[9] = 0;
-	str.Add(s);
-	cout << str.Get(0);
-
-	vector<int> x;
+	str.Add('\0');
+	for (int i = 0; i < str.count; ++i)
+		cout << str.Get(i);
+	cout << endl;
+	str.Sort(10);
+	for (int i = 0; i < str.count; ++i)
+		cout << str.Get(i);
+	cout << endl;

 }


--- a/source/sample/transformer/T2TAttention.cpp
+++ b/source/sample/transformer/T2TAttention.cpp
@@ -127,7 +127,7 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
    XTensor q2;
    XTensor v2;
    XTensor kqv2;
-    XList split;
+    TensorList split;
    
    kqv2 = MMul(kqv, wbig);
    

--- a/source/sample/transformer/T2TModel.cpp
+++ b/source/sample/transformer/T2TModel.cpp
@@ -84,7 +84,7 @@ void T2TModel::InitModel(int argc, char ** argv)
    if(isMT)
        decoder->InitModel(argc, argv, true, 0, devID, mem);

-    XList params(10);
+    TensorList params(10);
    GetParams(params);

    for(int i = 0; i < params.count; i++){
@@ -388,7 +388,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
 get parameter matrics
 >> list - the list that keeps the parameter matrics
 */
-void T2TModel::GetParams(XList &list)
+void T2TModel::GetParams(TensorList &list)
 {
    list.Clear();
    list.Add(&outputLayer->w);
@@ -448,7 +448,7 @@ void T2TModel::Dump(const char * fn)
    FILE * file = fopen(fn, "wb");
    CheckNTErrors(file, "Cannot open the model file");

-    XList params(100);
+    TensorList params(100);

    GetParams(params);

@@ -468,7 +468,7 @@ void T2TModel::Read(const char * fn)
    FILE * file = fopen(fn, "rb");
    CheckNTErrors(file, "Cannot open the model file");

-    XList params(100);
+    TensorList params(100);

    GetParams(params);


--- a/source/sample/transformer/T2TModel.h
+++ b/source/sample/transformer/T2TModel.h
@@ -98,7 +98,7 @@ public:
                       XTensor &maskDec, XTensor &maskEncDec);

    /* get parameter matrics */
-    void GetParams(XList &list);
+    void GetParams(TensorList &list);

    /* dump the parameters */
    void Dump(const char * fn);

--- a/source/sample/transformer/T2TPredictor.h
+++ b/source/sample/transformer/T2TPredictor.h
@@ -93,10 +93,10 @@ public:

    /* layers on the encoder side. We actually use the encoder output instead
       of all hidden layers. */
-    XList layersEnc;
+    TensorList layersEnc;

    /* layers on the decoder side */
-    XList layersDec;
+    TensorList layersDec;

    /* list of states */
    T2TState * states;

--- a/source/sample/transformer/T2TTrainer.cpp
+++ b/source/sample/transformer/T2TTrainer.cpp
@@ -491,7 +491,7 @@ where
 */
 void T2TTrainer::Update(T2TModel * model, const float lr)
 {
-    XList ws(100);
+    TensorList ws(100);

    model->GetParams(ws);

@@ -552,7 +552,7 @@ void T2TTrainer::PrepareModel(T2TModel * model)
    moments.Clear();
    moments2nd.Clear();

-    XList ws(100);
+    TensorList ws(100);

    model->GetParams(ws);


--- a/source/sample/transformer/T2TTrainer.h
+++ b/source/sample/transformer/T2TTrainer.h
@@ -82,10 +82,10 @@ public:
    float adamBeta2T;

    /* list of the moment of the parameter matrics */
-    XList moments;
+    TensorList moments;

    /* list of the 2nd order moment of the parameter matrics */
-    XList moments2nd;
+    TensorList moments2nd;

    /* indicates whether the data file is shuffled for training */
    bool isShuffled;

--- a/source/tensor/XDevice.cpp
+++ b/source/tensor/XDevice.cpp
@@ -478,7 +478,7 @@ split a string
 >> items - splitting result
 << return - how many items are there
 */
-int SplitALine(char * inputString, const char * seperator, CharList * items)
+int SplitALine(char * inputString, const char * seperator, StrList* items)
 {
    items->Clear();

@@ -532,7 +532,7 @@ get device ids for the given device information
 */
 int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
 {
-	CharList* terms = new CharList(1);
+	StrList* terms = new StrList(1);
    SplitALine(devInfo, " ", terms);

    for(int i = 0; i < terms->count; i++){

--- a/source/tensor/XLink.cpp
+++ b/source/tensor/XLink.cpp
@@ -300,7 +300,7 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id
    if(h == NULL)
        return;
    
-    XList list(2);
+    TensorList list(2);
    list.Add((XTensor*)t1);
    list.Add((XTensor*)t2);

@@ -320,7 +320,7 @@ void XLink::MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3,
    if (h == NULL)
        return;

-    XList list(3);
+    TensorList list(3);
    list.Add((XTensor*)t1);
    list.Add((XTensor*)t2);
    list.Add((XTensor*)t3);
@@ -334,7 +334,7 @@ create a hyper edge with a list of tensors and a output tensor
 >> h - head tensor
 >> id - id of the edge type
 */
-void XLink::MakeLink(const XList * list, XTensor * h, int id)
+void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
 {
    /* forward */
    XLink &income = h->income;
@@ -368,7 +368,7 @@ create a hyper edge with a input tensors and a list of output tensors
 >> list - a list of output tensors
 >> id - id of the edge type
 */
-void XLink::MakeLink(XTensor * t, XList * list, int id)
+void XLink::MakeLink(XTensor * t, TensorList * list, int id)
 {
    /* forward */
    for(int i = 0; i < list->count; i++){
@@ -544,7 +544,7 @@ void XLink::CopyIncoming(const XTensor * reference, XTensor * target)
    ClearIncoming(target);

    int tailNum = reference->income.tailNum;
-    XList tails(tailNum);
+    TensorList tails(tailNum);
    for(int i = 0; i < tailNum; i++){
        XTensor * tail = (XTensor*)reference->income.tails[i];
        tails.Add(tail);

--- a/source/tensor/XLink.h
+++ b/source/tensor/XLink.h
@@ -144,11 +144,11 @@ struct XLink

    /* create a hyper edge with a list of input tensors and a output tensor */
    static
-    void MakeLink(const XList * list, XTensor * h, int id);
+    void MakeLink(const TensorList * list, XTensor * h, int id);

    /* create a hyper edge with a input tensors and a list of output tensors */
    static
-    void MakeLink(XTensor * h, XList * list, int id);
+    void MakeLink(XTensor * h, TensorList * list, int id);

    /* add a parameter */
    static

--- a/source/tensor/XList.cpp
+++ b/source/tensor/XList.cpp
--- a/source/tensor/XList.h
+++ b/source/tensor/XList.h
@@ -25,23 +25,19 @@

 #include "XMem.h"
 #include "XGlobal.h"
-#include <utility>

-#ifndef __XLIST_H__
-#define __XLIST_H__
+#ifndef __TensorList_H__
+#define __TensorList_H__


 /* the nts (NiuTrans.Tensor) namespace */
 namespace nts {
 	
-/* the XListBase class */
+/* the TensorListBase class */
 template <typename T>
-struct XListBase {
+struct TensorListBase {
 public:

-	/* compare function */
-    typedef int (*ListCompare)(const T item1, const T item2);
-
    /* data items */
    T *items;

@@ -56,16 +52,16 @@ public:

 public:
    /* constructor */
-    XListBase();
+    TensorListBase();

    /* constructor */
-    XListBase(int myMaxNum);
+    TensorListBase(int myMaxNum);

    /* constructor */
-    XListBase(int myMaxNum, XMem* myMem);
+    TensorListBase(int myMaxNum, XMem* myMem);

    /* de-constructor */
-    ~XListBase();
+    ~TensorListBase();

    /* add an item into the list */
    void Add(T&& item);
@@ -77,7 +73,7 @@ public:
    void Add(T* inputItems, int inputItemCount);

 	/* append a list to the current list */
-    void AddList(XListBase* l);
+    void AddList(TensorListBase* l);

 	/* insert an item to the given position of the list */
    void Insert(int pos, const T& item);
@@ -101,7 +97,7 @@ public:
    void Clear();

 	/* sort the list */
-    void Sort(int itemSize, ListCompare comp);
+    void Sort(int itemSize);

 	/* reverse the list */
    void Reverse();
@@ -110,7 +106,7 @@ public:
    void Remove(int i);

 	/* copy the list */
-    XListBase* Copy(XMem* myMem);
+    TensorListBase* Copy(XMem* myMem);

 	/* shuffle the list */
    void Shuffle(int nround = 10, int beg = -1, int len = 0);
@@ -123,325 +119,19 @@ public:
 	void Set(int i, T item) { SetItem(i, item); };
 };

-/* constructor */
-template <typename T>
-XListBase<T>::XListBase()
-{
-    mem = NULL;
-    maxNum = 0;
-    count = 0;
-    items = NULL;
-}
-
-/* 
-constructor 
->> myMaxNum - maximum number of items to keep
->> isIntListOrNot - specify if the list keeps int items
-*/
-template <typename T>
-XListBase<T>::XListBase(int myMaxNum)
-{
-    mem = NULL;
-    maxNum = myMaxNum;
-    count = 0;
-    items = new T[myMaxNum];
-}
-
-/* 
-constructor 
->> myMaxNum - maximum number of items to keep
->> myMem - the memory pool used for data allocation
->> isIntListOrNot - specify if the list keeps int items
-*/
-template <typename T>
-XListBase<T>::XListBase(int myMaxNum, XMem* myMem)
-{
-    mem = myMem;
-    maxNum = myMaxNum;
-    count = 0;
-    items = (T*)mem->Alloc(mem->devID, sizeof(T) * maxNum);
-}
-
-/* de-constructor */
-template <typename T>
-XListBase<T>::~XListBase()
-{
-    delete[] items;
-}
-

-/*
-add an item into the list
->> item - a right value
-*/
-template <typename T>
-void XListBase<T>::Add(T&& item)
-{
-    if (count == maxNum) {
-        T* newItems;
-        if (mem == NULL)
-            newItems = new T[maxNum * 2 + 1];
-        else
-            newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
-        memcpy(newItems, items, sizeof(T) * maxNum);
-        if (mem == NULL)
-            delete[] items;
-        items = newItems;
-        maxNum = maxNum * 2 + 1;
-    }
-
-    items[count++] = item;
-}
-
-/*
-add an item into the list
->> item - a const reference to the item
-*/
-template <typename T>
-void XListBase<T>::Add(const T& item)
-{
-	if (count == maxNum) {
-		T* newItems;
-		if (mem == NULL)
-			newItems = new T[maxNum * 2 + 1];
-		else
-			newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
-		memcpy(newItems, items, sizeof(T) * maxNum);
-		if (mem == NULL)
-			delete[] items;
-		items = newItems;
-		maxNum = maxNum * 2 + 1;
-	}
-
-	items[count++] = item;
-}
-
-/* 
-add a number of items into the list 
->> inputItems - pointer to the array of items
->> inputItemCount - number of input items
-*/
-template <typename T>
-void XListBase<T>::Add(T* inputItems, int inputItemCount)
-{
-    if (count + inputItemCount >= maxNum) {
-        int newMaxNum = (count + inputItemCount) * 2 + 1;
-        T* newItems;
-        if (mem == NULL)
-            newItems = new T[newMaxNum];
-        else
-            newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * newMaxNum);
-        memcpy(newItems, items, sizeof(T) * maxNum);
-        if (mem == NULL)
-            delete[] items;
-        items = newItems;
-        maxNum = newMaxNum;
-    }
-    memcpy(items + count, inputItems, sizeof(T) * inputItemCount);
-    count += inputItemCount;
-}
-
-/*
-append a list to the current list
->> l - the list we use to append
-*/
-template <typename T>
-void XListBase<T>::AddList(XListBase* l)
-{
-    Add(l->items, l->count);
-}
-
-/*
-insert an item to the given position of the list
->> pos - the position
->> item - the item for insertion
-*/
-template <typename T>
-void XListBase<T>::Insert(int pos, const T& item)
-{
-    if (count == maxNum) {
-        T* newItems;
-        if (mem == NULL)
-            newItems = new T[maxNum * 2 + 1];
-        else
-            newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
-        memcpy(newItems, items, sizeof(T) * maxNum);
-        if (mem == NULL)
-            delete[] items;
-        items = newItems;
-        maxNum = maxNum * 2 + 1;
-    }
-
-    for (int i = count - 1; i >= pos; i--)
-        items[i + 1] = items[i];
-    items[pos] = item;
-    count++;
-}
-
-template<typename T>
-void XListBase<T>::Insert(int pos, T&& item)
-{
-	if (count == maxNum) {
-		T* newItems;
-		if (mem == NULL)
-			newItems = new T[maxNum * 2 + 1];
-		else
-			newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
-		memcpy(newItems, items, sizeof(T) * maxNum);
-		if (mem == NULL)
-			delete[] items;
-		items = newItems;
-		maxNum = maxNum * 2 + 1;
-	}
-
-	for (int i = count - 1; i >= pos; i--)
-		items[i + 1] = items[i];
-	items[pos] = item;
-	count++;
-}
-
-/* get the item at position i */
-template <typename T>
-T& XListBase<T>::GetItem(int i) const
-{
-    CheckNTErrors(i >= -1 && i < count, "Index of a list item is out of scope!");
-    CheckNTErrors(count > 0, "Cannt index the item in an empty list!");
-    if (i == -1)
-        return items[count - 1];
-    else
-        return items[i];
-}
-
-/* set the item at position i */
-template <typename T>
-void XListBase<T>::SetItem(int i, const T& item)
-{
-    if (i >= 0 && i < count)
-        items[i] = item;
-}
-
-template<typename T>
-inline void XListBase<T>::SetItem(int i, T&& item)
-{
-	if (i >= 0 && i < count)
-		items[i] = std::move(item);
-}
-
-/* 
-find the position of the first matched item 
->> item - the item for matching
-<< the position where we hit the item (if any)
-*/
-
-template <typename T>
-int XListBase<T>::FindFirst(const T& item)
-{
-    for (int i = 0; i < count; i++) {
-        if (item == items[i])
-            return i;
-    }
-    return -1;
-}
-
-/* clear the data array */
-template <typename T>
-void XListBase<T>::Clear()
-{
-	delete[] items;
-	count = 0;
-}
-
-/* 
-sort the list 
->> itemSize - size of an item
->> comp - the comparison function used in sorting
-*/
-template <typename T>
-void XListBase<T>::Sort(int itemSize, ListCompare comp)
-{
-    qsort(items, count, itemSize, comp);
-}
-
-/* reverse the list */
-template <typename T>
-void XListBase<T>::Reverse()
-{
-    int half = count / 2;
-    for (int i = 0; i < half; i++) {
-        T tmp(std::move(items[i]));
-        items[i] = std::move(items[count - i - 1]);
-        items[count - i - 1] = std::move(tmp);
-    }
-}
-
-/* remove the item at position i */
-template <typename T>
-void XListBase<T>::Remove(int i)
-{
-    if (i >= count || i < 0)
-        return;
-
-    memcpy(items + i, items + i + 1, sizeof(T*) * (count - i - 1));
-
-    count--;
-}
-
-/* 
-copy the list 
->> myMem - memory pool used for allocating the data in the new list
-<< hard copy of the list
-*/
-template <typename T>
-XListBase<T>* XListBase<T>::Copy(XMem* myMem)
-{
-    XListBase<T>* newList = new XListBase<T>(maxNum, myMem);
-    for (int i = 0; i < count; i++) {
-        newList->Add(GetItem(i));
-    }
-    return newList;
-}
-
-/* 
-shuffle the list
->> nround - number of rounds for shuffling
->> beg - where we start
->> len - how many items are used in shuffling
-*/
-template <typename T>
-void XListBase<T>::Shuffle(int nround, int beg, int len)
-{
-    if (beg < 0) {
-        beg = 0;
-        len = count;
-    }
-
-    if (beg + len > count)
-        return;
-
-    srand((unsigned int)time(NULL));
-
-    for (int k = 0; k < nround; k++) {
-        /* FisherCYates shuffle */
-        for (int i = 0; i < len; i++) {
-            float a = (float)rand() / RAND_MAX;
-            size_t j = (unsigned int)(a * (i + 1));
-            T t = items[beg + j];
-            items[beg + j] = items[beg + i];
-            items[beg + i] = t;
-        }
-    }
-}

 struct XTensor;

-/* typedef for list */
-typedef XListBase<int> IntList;
-typedef XListBase<char*> CharList;
-typedef XListBase<long> LongList;
-typedef XListBase<float> FloatList;
-typedef XListBase<short> ShortList;
-typedef XListBase<XTensor*> XList;
+typedef TensorListBase<int> IntList;
+typedef TensorListBase<char> CharList;
+typedef TensorListBase<char*> StrList;
+typedef TensorListBase<long> LongList;
+typedef TensorListBase<float> FloatList;
+typedef TensorListBase<short> ShortList;
+typedef TensorListBase<void*> XList;
+typedef TensorListBase<XTensor*> TensorList;

 } /* end of the nts (NiuTrans.Tensor) namespace */

-#endif // __XLIST_H__
+#endif // __TensorList_H__
--- a/source/tensor/XPRunner.cpp
+++ b/source/tensor/XPRunner.cpp
@@ -146,7 +146,7 @@ run a set of jobs in parallel
 >> jobArgs - the list of arguments for each job
 >> sleepTime - time to sleep (in ms) for each round
 */
-void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
+void XPRunner::Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime)
 {
    if(threadNum <= 0){
        XPRINT(1, stderr, "Error! No threads were created!\n");
@@ -195,7 +195,7 @@ void XPRunner::Run(XList * jobFunctions, XList * jobArgs, float sleepTime)
            TFunction function = (TFunction)jobFunctions->GetItem(jobArgs->count - c);

            /* the arguments that are passed to the function */
-            volatile XList * args = (XList*)jobArgs->GetItem(jobArgs->count - c);
+            volatile TensorList * args = (TensorList*)jobArgs->GetItem(jobArgs->count - c);

            /* thread */
            XThread * thread  = threads + availableThreads[i];

--- a/source/tensor/XPRunner.h
+++ b/source/tensor/XPRunner.h
@@ -106,7 +106,7 @@ public:
    void KillThreads();

    /* run a set of jobs in parallel */
-    void Run(XList * jobFunctions, XList * jobArgs, float sleepTime = 0);
+    void Run(TensorList * jobFunctions, TensorList * jobArgs, float sleepTime = 0);

    /* get the number of parallel jobs to run */
    int GetJobNum(int size);

--- a/source/tensor/XQueue.cpp
+++ b/source/tensor/XQueue.cpp
@@ -42,7 +42,7 @@ job item used in queues
 JobQueueNode::JobQueueNode()
 {
    job  = NULL;
-    args = new XList(1);
+    args = new TensorList(1);
 }

 /* de-constructor */
@@ -67,7 +67,7 @@ XQueue::XQueue(int mySize)
    head = 0;
    tail = 0;
    isJobQueue = false;
-    jobDequeuerArgs = new XList(1);
+    jobDequeuerArgs = new TensorList(1);
    jobDequeuerBreak = false;
    runningJobCount = 0;
    jobStream = NULL;
@@ -213,7 +213,7 @@ void XQueue::StopJobConsumer()
 }

 /* add a job item to process */
-void XQueue::EnqueueJob(void * job, XList * jobArgs)
+void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
 {
    MUTEX_LOCK(jobQueueMutex);
    runningJobCount++;
@@ -227,7 +227,7 @@ void XQueue::EnqueueJob(void * job, XList * jobArgs)
 }

 /* job item consumer */
-void XQueue::DequeueJobs(XList * args)
+void XQueue::DequeueJobs(TensorList * args)
 {
    CheckNTErrors((args->count == 2), "Illegal arguments!");


--- a/source/tensor/XQueue.h
+++ b/source/tensor/XQueue.h
@@ -52,7 +52,7 @@ public:
    void * job;

    /* arguments of the job */
-    XList * args;
+    TensorList * args;

 public:
    /* constructor */
@@ -102,7 +102,7 @@ private:
    XThread jobDequeuer;

    /* argument list of jobDequeuer */
-    XList * jobDequeuerArgs;
+    TensorList * jobDequeuerArgs;

    /* indicates whether jobDequeuer stops */
    bool jobDequeuerBreak;
@@ -141,11 +141,11 @@ public:
    void StopJobConsumer();

    /* add a job item to process */
-    void EnqueueJob(void * job, XList * jobArgs);
+    void EnqueueJob(void * job, TensorList * jobArgs);

    /* job item consumer */
    static
-    void DequeueJobs(XList * args);
+    void DequeueJobs(TensorList * args);

    /* get the break flag */
    bool GetJobBreak();

--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -1946,7 +1946,7 @@ void XTensor::FlushToMem(XMem * targetMem)
    if (targetMem->devID >= 0) {
 #ifdef USE_CUDA
        if (devID < 0) {
-            XList l(1);
+            TensorList l(1);
            l.Add(this);
            CudaCPUToGPUFlush(&l, targetMem->devID, targetMem);
        }

--- a/source/tensor/XThread.h
+++ b/source/tensor/XThread.h
@@ -85,7 +85,7 @@ namespace nts{

 #endif

-typedef void (*TFunction) (volatile XList*);
+typedef void (*TFunction) (volatile TensorList*);

 /*
 This is a class that wraps the standard implementation of threading
@@ -133,7 +133,7 @@ public:

    /* arguments (for the function to run) */
    volatile
-    XList * argv;
+    TensorList * argv;

    /* a flag to break */
    volatile

--- a/source/tensor/core/arithmetic/MatrixMul.cpp
+++ b/source/tensor/core/arithmetic/MatrixMul.cpp
@@ -108,9 +108,9 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
        cBlockNum *= b->dimSizeRDI[i];
    }

-    XList * aList = new XList(10);
-    XList * bList = new XList(10);
-    XList * cList = new XList(10);
+    TensorList * aList = new TensorList(10);
+    TensorList * bList = new TensorList(10);
+    TensorList * cList = new TensorList(10);
    int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
    int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
    int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };

--- a/source/tensor/core/arithmetic/MatrixMul2DMultiTheading.cpp
+++ b/source/tensor/core/arithmetic/MatrixMul2DMultiTheading.cpp
@@ -38,11 +38,11 @@ argument5: matrix a
 argument6: matrix b
 argument7: matrix c (c=a*b*\alpha + c*beta)
 */
-void _MatrixMul2DMultiTheading(XList * args)
+void _MatrixMul2DMultiTheading(TensorList * args)
 {
 	CheckNTErrors(args->count == 2, "invalid argument number!");
 	IntList * indexArgs = (IntList*)args->GetItem(0);
-	XList * matrixArgs = (XList*)args->GetItem(1);
+	TensorList * matrixArgs = (TensorList*)args->GetItem(1);
 	CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
 	CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");


--- a/source/tensor/core/arithmetic/MatrixMul2DMultiTheading.h
+++ b/source/tensor/core/arithmetic/MatrixMul2DMultiTheading.h
@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 matrix multiplication for a block (x1,y1) - (x2,y2)
 where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
 */
-void _MatrixMul2DMultiTheading(XList * args);
+void _MatrixMul2DMultiTheading(TensorList * args);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/core/arithmetic/MatrixMulBatched.cpp
+++ b/source/tensor/core/arithmetic/MatrixMulBatched.cpp
@@ -227,9 +227,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
 >> alpha - scalar
 >> beta - scalar
 */
-void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
-                          const XList * b, MATRIX_TRANS_TYPE transposedB,
-                          XList * c, DTYPE alpha, DTYPE beta)
+void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
+                          const TensorList * b, MATRIX_TRANS_TYPE transposedB,
+                          TensorList * c, DTYPE alpha, DTYPE beta)
 {
    CheckNTErrors(a && b && c, "Empty input lists!");
    CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");

--- a/source/tensor/core/arithmetic/MatrixMulBatched.h
+++ b/source/tensor/core/arithmetic/MatrixMulBatched.h
@@ -58,8 +58,8 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, cons
 matrix multiplication of the two tensors c = trans(a) * trans(b) * alpha + c * beta (for list inputs)
 optimized for GPU
 */
-void _MatrixMulBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, 
-                          XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
+void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, const TensorList * b, MATRIX_TRANS_TYPE transposedB, 
+                          TensorList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);

 /*
 matrix multiplication of the two tensors (return an XTensor structure) c = trans(a) * trans(b) * alpha

--- a/source/tensor/core/arithmetic/XTensorBLAS.cu
+++ b/source/tensor/core/arithmetic/XTensorBLAS.cu
@@ -201,9 +201,9 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
 matrix multiplication via cuda version BLAS
 */
 void _CudaBLASMatrixMULList(cublasHandle_t * handle,
-                            const XList * a, MATRIX_TRANS_TYPE transposedA,
-                            const XList * b, MATRIX_TRANS_TYPE transposedB,
-                            XList * c,
+                            const TensorList * a, MATRIX_TRANS_TYPE transposedA,
+                            const TensorList * b, MATRIX_TRANS_TYPE transposedB,
+                            TensorList * c,
                            int count, DTYPE alpha, DTYPE beta)
 {
    CheckNTErrors((a && b && c), "Empty input lists!");

--- a/source/tensor/core/arithmetic/XTensorBLAS.h
+++ b/source/tensor/core/arithmetic/XTensorBLAS.h
@@ -56,8 +56,8 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
                                      DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);

 /* matrix multiplication in batch mode via cuda version BLAS */
-void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA, 
-                            const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
+void _CudaBLASMatrixMULList(cublasHandle_t * handle, const TensorList * a, MATRIX_TRANS_TYPE transposedA, 
+                            const TensorList * b, MATRIX_TRANS_TYPE transposedB, TensorList * c,
                            int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);

 #endif

--- a/source/tensor/core/math/Normalize.cpp
+++ b/source/tensor/core/math/Normalize.cpp
@@ -138,7 +138,7 @@ XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTen
    _Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);

    /* tensor connections */
-    XList list(5);
+    TensorList list(5);
    list.Add((XTensor*)&input);
    list.Add((XTensor*)&mean);
    list.Add((XTensor*)&var);

--- a/source/tensor/core/movement/CopyIndexed.cpp
+++ b/source/tensor/core/movement/CopyIndexed.cpp
@@ -227,7 +227,7 @@ XTensor CopyIndexed(const XTensor & s, int dim,
    /* call _CopyIndexed function */
    _CopyIndexed(&s, &t, dim, &srcIndex, &tgtIndex, copyNum);

-    XList list(3);
+    TensorList list(3);
    list.Add((XTensor*)&s);
    list.Add((XTensor*)&srcIndex);
    list.Add((XTensor*)&tgtIndex);

--- a/source/tensor/core/shape/Concatenate.cpp
+++ b/source/tensor/core/shape/Concatenate.cpp
@@ -37,7 +37,7 @@ or "Merge" by means of the tensor shapes
 >> big - the resulting tensor
 >> dim - which dimension we perform the concatenation
 */
-void _Concatenate(const XList * smalls, XTensor * big, int dim)
+void _Concatenate(const TensorList * smalls, XTensor * big, int dim)
 {
    bool uniform = true;
    for (int i = 1; i < smalls->count; i++) {
@@ -66,7 +66,7 @@ or "Merge" by means of the tensor shapes
 >> dim - which dimension we perform the concatenation
 << return - the tensor of concatenating a list of tensors along a given dimension
 */
-XTensor Concatenate(const XList &smalls, int dim)
+XTensor Concatenate(const TensorList &smalls, int dim)
 {
    CheckNTErrors(smalls.count > 0, "Empty list!");
    CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
@@ -147,7 +147,7 @@ concatenate two tensors along a given dimension
 */
 void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim)
 {
-    XList smalls(2);
+    TensorList smalls(2);
    smalls.Add((XTensor*)smallA);
    smalls.Add((XTensor*)smallB);

@@ -168,7 +168,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
 {
    CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");

-    XList smalls(2);
+    TensorList smalls(2);
    smalls.Add((XTensor*)&smallA);
    smalls.Add((XTensor*)&smallB);


--- a/source/tensor/core/shape/Concatenate.h
+++ b/source/tensor/core/shape/Concatenate.h
@@ -31,7 +31,7 @@ concatenate a list of tensors along a given dimension
 Note that this is actually a wrapper that selects 
 "ConcatenateSolely" or "Merge" by means of the tensor shapes 
 */
-void _Concatenate(const XList * smalls, XTensor * big, int dim);
+void _Concatenate(const TensorList * smalls, XTensor * big, int dim);

 /*
 concatenate a list of tensors along a given dimension (return an XTensor structure)
@@ -39,7 +39,7 @@ make a new tensor to keep the result and return it
 Note that this is actually a wrapper that selects 
 "ConcatenateSolely" or "Merge" by means of the tensor shapes 
 */
-XTensor Concatenate(const XList &smalls, int dim);
+XTensor Concatenate(const TensorList &smalls, int dim);

 /* concatenate two tensors along a given dimension */
 void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim);

--- a/source/tensor/core/shape/ConcatenateSolely.cpp
+++ b/source/tensor/core/shape/ConcatenateSolely.cpp
@@ -34,7 +34,7 @@ concatenate a list of tensors along a given dimension
 >> big - the resulting tensor
 >> dim - which dimension we perform the concatenation
 */
-void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
+void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
 {
    CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!");

@@ -85,7 +85,7 @@ void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
        }
    }
    else {
-        CharList * sourceArrays = new CharList(smalls->count);
+		StrList* sourceArrays = new StrList(smalls->count);
        int * blockSizes = new int[smalls->count];
        for (int i = 0; i < smalls->count; i++) {
            XTensor * tensor = (XTensor*)smalls->GetItem(i);

--- a/source/tensor/core/shape/ConcatenateSolely.h
+++ b/source/tensor/core/shape/ConcatenateSolely.h
@@ -27,7 +27,7 @@
 namespace nts { // namespace nts(NiuTrans.Tensor)

 /* concatenate a list of tensors along a given dimension */
-void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim);
+void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/core/shape/Merge.cpp
+++ b/source/tensor/core/shape/Merge.cpp
@@ -206,7 +206,7 @@ merge small tensors into a big tensor
 >> big - the merged tensor (for return)
 >> whereToMerge - the merging operation is along with which dimension
 */
-void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
+void _Merge(const TensorList * smalls, XTensor * big, int whereToMerge)
 {
    whereToMerge = (whereToMerge < 0 ? big->order - 1 : whereToMerge);

@@ -322,7 +322,7 @@ make a new tensor to keep the result and return it
 >> whereToMerge - the merging operation is along with which dimension
 << return - the big tensor merged by small tensors
 */
-XTensor Merge(const XList &smalls, int whereToMerge)
+XTensor Merge(const TensorList &smalls, int whereToMerge)
 {
    XTensor * tensor = smalls.GetItem(0);
    int order = tensor->order;
@@ -375,7 +375,7 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
    XTensor big(order, dimSize, smallA.dataType, dr, smallA.devID, smallA.mem);
    big.SetTMPFlag();

-    XList smalls(2);
+    TensorList smalls(2);
    smalls.Add((XTensor*)&smallA);
    smalls.Add((XTensor*)&smallB);


--- a/source/tensor/core/shape/Merge.h
+++ b/source/tensor/core/shape/Merge.h
@@ -34,10 +34,10 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -
 XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1);

 /* merge small tensors into a big tensor */
-void _Merge(const XList * smalls, XTensor * big, int whereToMerge);
+void _Merge(const TensorList * smalls, XTensor * big, int whereToMerge);

 /* merge small tensors into a big tensor (return an XTensor structure) */
-XTensor Merge(const XList &smalls, int whereToMerge);
+XTensor Merge(const TensorList &smalls, int whereToMerge);

 /* merge two tensors into a big tensor (return an XTensor structure) */
 XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge);

--- a/source/tensor/core/shape/MergeBlockLists.cpp
+++ b/source/tensor/core/shape/MergeBlockLists.cpp
@@ -34,7 +34,7 @@ merge data by blocks
 >> target - target data array
 >> myMem - memory pool
 */
-void _MergeBlockLists(const CharList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
+void _MergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
 {
    if (myMem != NULL && myMem->devID >= 0) {
 #ifdef USE_CUDA

--- a/source/tensor/core/shape/MergeBlockLists.cu
+++ b/source/tensor/core/shape/MergeBlockLists.cu
@@ -71,7 +71,7 @@ merge data by blocks (cuda version)
 >> target - target data array
 >> myMem - the memory pool
 */
-void _CudaMergeBlockLists(const CharList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
+void _CudaMergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
 {
    CheckNTErrors((myMem != NULL), "No memory pool!");
    CheckNTErrors((myMem->devID >= 0), "Wrong device to run!");

--- a/source/tensor/core/shape/MergeBlockLists.cuh
+++ b/source/tensor/core/shape/MergeBlockLists.cuh
@@ -30,10 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)

 /* copy a number of blocks (of different sizes) to target positions */
 __global__
-void KernelCopyBlockLists(DTYPE ** sourceList, int * sourceBlockSizes, int sourceBlockNum, DTYPE ** targetList);
+void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[]);

 /* merge data by blocks (cuda version) */
-void _CudaMergeBlockLists(const CharList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
+void _CudaMergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);

 #endif // USE_CUDA


--- a/source/tensor/core/shape/MergeBlockLists.h
+++ b/source/tensor/core/shape/MergeBlockLists.h
@@ -27,7 +27,7 @@
 namespace nts { // namespace nts(NiuTrans.Tensor)

 /* merge data by blocks */
-void _MergeBlockLists(const CharList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
+void _MergeBlockLists(const StrList* sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/core/shape/Split.cpp
+++ b/source/tensor/core/shape/Split.cpp
@@ -209,7 +209,7 @@ split a big tensor into small tensors
 >> whereToSplit - which dimension of the tensor is to split
 >> splitNum - how many splits
 */
-void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
+void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum)
 {
    CheckNTErrors((smalls != NULL), "Invalid list!");
    CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!");
@@ -340,7 +340,7 @@ split a big tensor into small tensors
 >> whereToSplit - which dimension of the tensor is to split
 >> splitNum - how many splits
 */
-void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum)
+void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNum)
 {
    CheckNTErrors(big.GetDim(whereToSplit) % splitNum == 0, "Wrong splitNum!");


--- a/source/tensor/core/shape/Split.h
+++ b/source/tensor/core/shape/Split.h
@@ -42,13 +42,13 @@ e.g., (M, N) -> (M, N/3, 3)
 XTensor Split(const XTensor &s, int whereToSplit, int splitNum);

 /* split a big tensor into small tensors */
-void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum);
+void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum);

 /* 
-split a big tensor into small tensors (return a XList structure)
+split a big tensor into small tensors (return a TensorList structure)
 make a new list to keep the result and return it
 */
-void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum);
+void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNum);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/core/shape/Unsqueeze.cpp
+++ b/source/tensor/core/shape/Unsqueeze.cpp
@@ -78,7 +78,7 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
 #endif
    }
    else {
-        CharList * sourceArrays = new CharList(blockNumB);
+        StrList * sourceArrays = new StrList(blockNumB);
        int * blockSizes = new int[blockNumB];

        for (int i = 0; i < blockNumA; i++) {

--- a/source/tensor/core/sort/Sort.cpp
+++ b/source/tensor/core/sort/Sort.cpp
@@ -114,7 +114,7 @@ void Sort(XTensor & a, XTensor & b, XTensor & index, int dim)
    _Sort(&a, &b, &index, dim);
    
    /* tensor connections */
-    XList list(2);
+    TensorList list(2);
    list.Add(&b);
    list.Add(&index);
     XLink::MakeLink(&a, &list, SORT_SORT);

--- a/source/tensor/core/sort/TopK.cpp
+++ b/source/tensor/core/sort/TopK.cpp
@@ -128,7 +128,7 @@ void TopK(XTensor &a, XTensor &b, XTensor &index, int dim, int k)
        _TopK(&a, &b, &index, dim, k);

    /* tensor connection */
-    XList list(2);
+    TensorList list(2);
    list.Add(&b);
    list.Add(&index);
    XLink::MakeLink(&a, &list, SORT_TOPK);

--- a/source/tensor/core/utilities/FlushToMem.cpp
+++ b/source/tensor/core/utilities/FlushToMem.cpp
@@ -31,7 +31,7 @@ flush a list of XTensor to GPU memory
 >> devID - target GPU id
 >> GPUMem - memory pool for the GPU
 */
-void CPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
+void CPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
 {
 #ifdef USE_CUDA
    CudaCPUToGPUFlush(mList, devID, GPUMem);

--- a/source/tensor/core/utilities/FlushToMem.cu
+++ b/source/tensor/core/utilities/FlushToMem.cu
@@ -32,7 +32,7 @@ flush a list of XTensor to GPU memory
 >> devID - target GPU id
 >> GPUMem - memory pool for the GPU
 */
-void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
+void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
 {
    if (mList == NULL || mList->count == 0)
        return;

--- a/source/tensor/core/utilities/FlushToMem.cuh
+++ b/source/tensor/core/utilities/FlushToMem.cuh
@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #ifdef USE_CUDA

 /* flush a list of XTensor to GPU memory */
-void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem);
+void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem);

 /* copy the data from GPU memory to CPU memory */
 void CudaGPUToCPUFlush(XTensor * tensor);

--- a/source/tensor/core/utilities/FlushToMem.h
+++ b/source/tensor/core/utilities/FlushToMem.h
@@ -27,7 +27,7 @@
 namespace nts { // namespace nts(NiuTrans.Tensor)

 /* flush a list of XTensor to GPU memory */
-void CPUToGPUFlush(XList * mList, int devID, XMem * GPUMem);
+void CPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem);

 /* copy the data from GPU memory to CPU memory */
 void GPUToCPUFlush(XTensor * tensor);

--- a/source/tensor/core/utilities/XMatrixSegment.cpp
+++ b/source/tensor/core/utilities/XMatrixSegment.cpp
@@ -51,7 +51,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
    CheckNTErrors(jobNum != 0, "TODO!");

    /* argument list of the jobs */
-    XList * jobArgList = new XList(argNum);
+    TensorList * jobArgList = new TensorList(argNum);

    va_list ap;
    va_start(ap, argNum);
@@ -62,13 +62,13 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
    va_end(ap);

    /* prepare the neccesary argument list for parallel processing */
-    XList * jobs = new XList(jobNum);
-    XList * args = new XList(jobNum);
+    TensorList * jobs = new TensorList(jobNum);
+    TensorList * args = new TensorList(jobNum);

-    int * indexList = new int[jobNum * 4 * 4];
+    int * indeTensorList = new int[jobNum * 4 * 4];

    /* segment the matrix into blocks */
-    int nblock = SegmentTensor2D(rowNum, colNum, jobNum, indexList);
+    int nblock = SegmentTensor2D(rowNum, colNum, jobNum, indeTensorList);

    /*
    assign jobs
@@ -78,8 +78,8 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
    */
    for (int i = 0; i < jobNum; i++) {
 		IntList* indexArgs = new IntList(4);
-        XList * blockArgs = new XList(argNum);
-        int * blockIndex = indexList + i * 4;
+        TensorList * blockArgs = new TensorList(argNum);
+        int * blockIndex = indeTensorList + i * 4;

 		indexArgs->Add(blockIndex[0]);
 		indexArgs->Add(blockIndex[1]);
@@ -106,9 +106,9 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
        parallelRunner->Run(jobs, args);

    /* free the memory */
-    delete[] indexList;
+    delete[] indeTensorList;
    for (int i = 0; i < args->count; i++) {
-        XList * blockArgs = (XList*)args->GetItem(i);
+        TensorList * blockArgs = (TensorList*)args->GetItem(i);
        delete blockArgs;
    }
    delete args;
@@ -154,7 +154,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
    int xMax = rowNum - 1;
    int yMax = colNum - 1;
    int nblock = 0, nitem = 0;
-    int * indexList = blockIndex;
+    int * indeTensorList = blockIndex;

    int xSegNum = int((float)rowNum / colSize);
    int ySegNum = int((float)colNum / rowSize);
@@ -175,7 +175,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
            x2 = step - 1;
            y2 = yMax;
            while (x2 <= xMax) {
-                int * blockIndex = indexList + nblock * 4;
+                int * blockIndex = indeTensorList + nblock * 4;
                blockIndex[0] = x1; blockIndex[1] = y1;
                blockIndex[2] = x2; blockIndex[3] = y2;
                nblock++;
@@ -201,7 +201,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
            x2 = xMax;
            y2 = step - 1;
            while (y2 <= yMax) {
-                int * blockIndex = indexList + nblock * 4;
+                int * blockIndex = indeTensorList + nblock * 4;
                blockIndex[0] = x1; blockIndex[1] = y1;
                blockIndex[2] = x2; blockIndex[3] = y2;
                nblock++;
@@ -241,7 +241,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
        }

        while (y2 <= yMax) {
-            int * blockIndex = indexList + nblock * 4;
+            int * blockIndex = indeTensorList + nblock * 4;
            blockIndex[0] = x1; blockIndex[1] = y1;
            blockIndex[2] = x2; blockIndex[3] = y2;
            nblock++;

--- a/source/tensor/test/TConcatenate.cpp
+++ b/source/tensor/test/TConcatenate.cpp
@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
 bool TestConcatenate1()
 {
 	/* create list */
-    XList * sList = new XList();
+    TensorList * sList = new TensorList();

    /* a source tensor of size (2, 1) */
    int sOrder1 = 2;
@@ -157,7 +157,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
 bool TestConcatenate2()
 {
 	/* create list */
-    XList * sList = new XList();
+    TensorList * sList = new TensorList();

    /* a source tensor of size (2, 1) */
    int sOrder1 = 2;
@@ -286,7 +286,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
 bool TestConcatenate3()
 {
 	/* create list */
-    XList * sList = new XList();
+    TensorList * sList = new TensorList();

    /* a source tensor of size (2, 1) */
    int sOrder1 = 2;

--- a/source/tensor/test/TConcatenateSolely.cpp
+++ b/source/tensor/test/TConcatenateSolely.cpp
@@ -31,7 +31,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
 bool TestConcatenateSolely1()
 {
 	/* create list */
-    XList * sList = new XList();
+    TensorList * sList = new TensorList();

    /* a source tensor of size (2, 1) */
    int sOrder1 = 2;
@@ -154,7 +154,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
 bool TestConcatenateSolely2()
 {
 	/* create list */
-    XList * sList = new XList();
+    TensorList * sList = new TensorList();

    /* a source tensor of size (2, 1) */
    int sOrder1 = 2;
@@ -279,7 +279,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
 bool TestConcatenateSolely3()
 {
 	/* create list */
-    XList * sList = new XList();
+    TensorList * sList = new TensorList();

    /* a source tensor of size (2, 1) */
    int sOrder1 = 2;

--- a/source/tensor/test/TMerge.cpp
+++ b/source/tensor/test/TMerge.cpp
@@ -246,7 +246,7 @@ In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
 bool TestMerge3()
 {
    /* create list */
-    XList * smallList = new XList();
+    TensorList * smallList = new TensorList();

    /* a small tensor of size (2, 4) */
    int sOrder = 2;
@@ -364,7 +364,7 @@ In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
 bool TestMerge4()
 {
    /* create list */
-    XList * smallList = new XList();
+    TensorList * smallList = new TensorList();

    /* a small tensor of size (2, 4) */
    int sOrder = 2;

--- a/source/tensor/test/TSplit.cpp
+++ b/source/tensor/test/TSplit.cpp
@@ -222,8 +222,8 @@ In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
 bool TestSplit3()
 {
 	/* create list */
-    XList * tList = new XList();
-    XList tUserList;
+    TensorList * tList = new TensorList();
+    TensorList tUserList;

    /* a source tensor of size (3, 4) */
    int sOrder = 2;