合并分支 'xiaotong-working' 到 'master'

Xiaotong working 查看合并请求 NiuTrans/NiuTrans.Tensor!4

合并分支 'xiaotong-working' 到 'master'
Xiaotong working 查看合并请求 NiuTrans/NiuTrans.Tensor!4
25ec9c77 · xiaotong · 2631f50f · e02c6b92 · 25ec9c77 · 25ec9c77
Commit 25ec9c77 authored Jul 05, 2018 by xiaotong
--- a/.gitignore
+++ b/.gitignore
+NiuTrans.Tensor.vcxproj
+NiuTrans.Tensor.vcxproj.filters
+x64/
--- a/source/XBLAS.h
+++ b/source/XBLAS.h
@@ -12,7 +12,7 @@
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
- * limitations under the License.
+ * limitations under the License.b
 */

 /*
@@ -206,4 +206,4 @@ extern void UnloadBLAS();

 } /* end of the nts (NiuTrans.Tensor) namespace */

-#endif
\ No newline at end of file
+#endif
--- a/source/XLink.cpp
+++ b/source/XLink.cpp
+/* NiuTrans.Tensor - an open-source tensor library
+ * Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
+ */
+
+#include <stdio.h>
+#include "XLink.h"
+
+namespace nts{ // namespace nts(NiuTrans.Tensor)
+
+int XLink::paramSize = 64;
+
+/* constuctor */
+XLink::XLink()
+{
+    head   = NULL;
+    tails  = NULL;
+    params = NULL;
+    tailNum  = 0;
+    paramNum = 0;
+    type[0] = 0;
+}
+    
+/* deconstructor */
+XLink::~XLink()
+{
+    delete[] tails;
+    delete[] (char*)params;
+}
+
+/* reset it */
+void XLink::Reset()
+{
+    delete[] tails;
+    delete[] (char*)params;
+    head   = NULL;
+    tails  = NULL;
+    params = NULL;
+    tailNum  = 0;
+    paramNum = 0;
+    type[0] = 0;
+}
+
+/* 
+set edge type name 
+>> typeName - type name in string
+*/
+void XLink::SetType(const char * typeName)
+{
+    type[0] = 0;
+    if(typeName == NULL)
+        return;
+    strcpy(type, typeName);
+}
+
+/* 
+set head 
+>> h - pointer to the head tensor
+*/
+void XLink::SetHead(XTensor * h)
+{
+    head = h;
+}
+
+/* 
+add a tail
+>> t - pointer to the tail tensor
+*/
+void XLink::AddTail(XTensor * t)
+{
+    XTensor ** ts = tails;
+    tails = new XTensor*[tailNum + 1];
+    memcpy(tails, ts, sizeof(XTensor*) * tailNum);
+    tails[tailNum++] = t;
+    delete[] ts;
+}
+
+/* 
+add two tails in one time 
+>> t1 - pointer to the tail tensor
+>> t2 - pointer to another tail tensor
+*/
+void XLink::AddTwoTails(XTensor * t1, XTensor * t2)
+{
+    XTensor ** ts = tails;
+    tails = new XTensor*[tailNum + 2];
+    memcpy(tails, ts, sizeof(XTensor*) * tailNum);
+    tails[tailNum++] = t1;
+    tails[tailNum++] = t2;
+    delete[] ts;
+}
+
+/* 
+add a parameter 
+>> param - parameter in default type
+*/
+void XLink::AddParam(DTYPE param)
+{
+    void * ps = params;
+    params = new char[paramNum + 1];
+    memcpy(params, ps, paramNum * paramSize);
+    DTYPE * p = (DTYPE*)((char*)params + paramNum * paramSize);
+    *p = param;
+    paramNum++;
+    delete[] (char*)ps;
+}
+
+/* 
+add a parameter 
+>> param - pointer to the parameter
+>> size - size of the parameter
+*/
+void XLink::AddParam(void * param, int size)
+{
+    void * ps = params;
+    params = new char[paramNum + 1];
+    memcpy(params, ps, paramNum * paramSize);
+    char * p = (char*)params + paramNum * paramSize;
+    memcpy(p, param, size);
+    paramNum++;
+    delete[] (char*)ps;
+}
+/* 
+create a hyperedge with two input tensors and a output tensor 
+>> t1 - a tail tensor
+>> t2 - another tail tensor
+>> h - head tensor
+>> typeName - name of edge type
+*/
+void XLink::MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName)
+{
+    if(h != NULL)
+        return;
+
+    /* forward */
+    XLink &income = h->income;
+    income.Reset();
+    income.SetHead(h);
+    if(t1 != NULL && t2 != NULL)
+        income.AddTwoTails(t1, t2);
+    else if(t1 != NULL)
+        income.AddTail(t1);
+    else{
+        ShowNTErrors("TODO!");
+    }
+    income.SetType(typeName);
+
+    /* backward for t1 */
+    if(t1 != NULL){
+        XLink &outgo = t1->outgo;
+        CheckNTErrors(outgo.head != h, "Wrong head of the hyperedge!");
+        outgo.AddTail(h);
+    }
+
+    /* backward for t2 */
+    if(t2 != NULL){
+        XLink &outgo = t2->outgo;
+        CheckNTErrors(outgo.head != h, "Wrong head of the hyperedge!");
+        outgo.AddTail(h);
+    }
+}
+
+/* 
+add parameters 
+>> h - head
+>> param - parameter we want introduce
+*/
+void XLink::AddParamToHead(XTensor * h, DTYPE param)
+{
+    if(h != NULL)
+        return;
+    h->income.AddParam(param);
+}
+
+/* 
+add an integer parameter 
+>> h - head
+>> param - parameter we want introduce
+*/
+void XLink::AddParamToHeadInt(XTensor * h, int param)
+{
+    if(h != NULL)
+        return;
+    h->income.AddParam(&param, sizeof(int));
+}
+    
+} // namespace nts(NiuTrans.Tensor)
+
--- a/source/XLink.h
+++ b/source/XLink.h
+/* NiuTrans.Tensor - an open-source tensor library
+ * Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-04
+ */
+
+#include <stdio.h>
+#include "XGlobal.h"
+#include "XTensor.h"
+
+#ifndef __XLINK_H__
+#define __XLINK_H__
+
+#include "XGlobal.h"
+
+namespace nts{ // namespace nts(NiuTrans.Tensor)
+
+/* cross reference */
+struct XTensor;
+
+#define MAX_OP_NAME_LENGTH 16
+
+/*
+This defines the link among tensors in networks. XLink can be
+cast as a hyperedge in a graph. when we compute on tensors, we actually create a
+network where nodes are tensors and edges the connections among them. Each connection is
+a hyperedge whose head is the output tensor and tails are input tensors. E.g,
+c = a + b
+represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
+ 
+   + (=c)
+  / \
+ a   b
+ 
+for c, we have a incoming edge (a, b) -> c
+for a, we also have a edge c -> a in the reverse order (in a view of acyclic directed graphs)
+*/
+struct XLink
+{
+    /* head of the hyperedge */
+    XTensor *  head;
+
+    /* tails of the hyperedge */
+    XTensor ** tails;
+
+    /* number of tails */
+    int tailNum;
+
+    /* parameters used. e.g., c = a * b * \alpha 
+       scalar \alpha is the parameter */
+    void * params;
+
+    /* number of parameters */
+    int paramNum;
+
+    /* size of each parameter */
+    static int paramSize;
+
+    /* name of the hyperedge type. e.g., sum, mul ... */
+    char type[MAX_OP_NAME_LENGTH];
+    
+    /* constuctor */
+    XLink();
+    
+    /* deconstructor */
+    ~XLink();
+
+    /* reset it */
+    void Reset();
+
+    /* set edge type name */
+    void SetType(const char * typeName);
+
+    /* set head */
+    void SetHead(XTensor * h);
+
+    /* add a tail */
+    void AddTail(XTensor * t);
+
+    /* add two tails in one time */
+    void AddTwoTails(XTensor * t1, XTensor * t2);
+
+    /* add a integer parameter */
+    void AddParam(DTYPE param);
+
+    /* add a integer parameter */
+    void AddParam(void * param, int size);
+
+    /* create a hyper edge with two input tensors and a output tensor */
+    static
+    void MakeLink(XTensor * t1, XTensor * t2, XTensor * h, const char * typeName);
+
+    /* add a parameter */
+    static
+    void AddParamToHead(XTensor * h, DTYPE param);
+
+    /* add an integer parameter */
+    static
+    void AddParamToHeadInt(XTensor * h, int param);
+};
+    
+} // namespace nts(NiuTrans.Tensor)
+
+#endif // __XLINK_H__
\ No newline at end of file
--- a/source/XName.h
+++ b/source/XName.h
+/* NiuTrans.Tensor - an open-source tensor library
+ * Copyright (C) 2018, Natural Language Processing Lab, Northestern University. 
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ *
+ * We define various names here
+ *
+ * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-05
+ * It was really HOT these days. I can't imagine what a hot day in Shenyang!
+*/
+
+#ifndef __XNAME_H__
+#define __XNAME_H__
+
+namespace nts { // namespace nts(NiuTrans.Tensor)
+
+#define MATH_MATMUL "M_MATMUL"
+#define MATH_REDUCEMAX "M_REDUCEMAX"
+#define MATH_REDUCESUM "M_REDUCESUM"
+#define MATH_SELECTRANGE "M_SELECTRANGE"
+#define MATH_SORT "M_SORT"
+#define MATH_SUM "M_SUM"
+#define MATH_TOPK "M_TOPK"
+#define MATH_UNSQUEEZE "M_UNSQUEEZE"
+
+
+
+} // namespace nts(NiuTrans.Tensor)
+
+#endif // __XNAME_H__
\ No newline at end of file
--- a/source/XTensor.h
+++ b/source/XTensor.h
@@ -21,7 +21,7 @@
 *
 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-07-31
 * I'm working while most of the students are enjoying their holidays :(
- * $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
+ * $Updated by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
 *
 */

@@ -36,10 +36,14 @@
 #include "XList.h"
 #include "XDataType.h"
 #include "XMem.h"
+#include "XLink.h"

 /* the nts (NiuTrans.Tensor) namespace */
 namespace nts{

+/* cross reference */
+struct XLink;
+
 /* define the maximum number of dimensions in a tensor */
 #define MAX_TENSOR_DIM_NUM 6
 #define USE_BATCHED_STRIDED_MAT_MUL
@@ -47,9 +51,7 @@ namespace nts{
 #define MIN_TENSOR_SPLIT_LIST_NUM 1024
 #define MIN_TENSOR_CAT_NUM 8

-/*
-computation flags
-*/
+/* computation flags */
 #define UNSAFE_BUT_FAST_MEM
 #define FAST_MATRIX

@@ -59,7 +61,6 @@ is the parent class of XMatrix.
 */
 struct XTensor
 {
-public:
    /* memory pool */
    XMem * mem;

@@ -129,11 +130,24 @@ public:

    /* indicates whether the tensor is initialized or not */
    bool isInit;
+    
+    /*
+    the link used to form networks. Note that when we compute on tensors, we actually create a
+    network where nodes are tensors and edges the connections among them. Each connection is
+    a hyperedge whose head is the output tensor and tails are input tensors. E.g,
+    c = a + b
+    represents a network with three nodes (a, b and c) and a hyperedge that links a and b (tails) to c (head).
+    Here "income" keeps which nodes (tensors) are used to form the current node (tensor).
+    */
+    XLink income;
+    
+    /* It keeps which nodes (tensors) we go to from the current node (tensor). */
+    XLink outgo;

-/*******************************************************************
-XTensor untilities
-*/
-public:
+    /********************
+     XTensor untilities
+    ********************/
+    
    /* constructor */
    XTensor();


--- a/source/core/CHeader.h
+++ b/source/core/CHeader.h
@@ -42,7 +42,7 @@
 #include "MatrixMULBatchedCPU.h"
 #include "Merge.h"
 #include "MergeBlockLists.h"
-#include "MultiplyElementWise.h"
+#include "Multiply.h"
 #include "Negate.h"
 #include "Normalize.h"
 #include "Power.h"

--- a/source/core/MultiplyElementWise.cpp
+++ b/source/core/MultiplyElementWise.cpp
@@ -20,8 +20,8 @@
 */

 #include "../XTensor.h"
-#include "MultiplyElementWise.h"
-#include "MultiplyElementWise.cuh"
+#include "Multiply.h"
+#include "Multiply.cuh"

 namespace nts { // namespace nts(NiuTrans.Tensor)
 /*
@@ -34,7 +34,7 @@ where i is the index of the item
 >> alpha - the coefficient
 >>
 */
-void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
+void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
 {
 	int leadingDimRDI = a->order - leadingDim - 1;
    CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
@@ -43,7 +43,7 @@ void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, 

 #ifdef USE_CUDA
    if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
-        CudaMultiplyElementWise(a, b, c, leadingDim, alpha);
+        CudaMultiply(a, b, c, leadingDim, alpha);
        return;
    }
 #endif

--- a/source/core/MultiplyElementWise.cu
+++ b/source/core/MultiplyElementWise.cu
@@ -21,8 +21,8 @@

 #include "../XDevice.h"
 #include "../XTensor.h"
-#include "MultiplyElementWise.h"
-#include "MultiplyElementWise.cuh"
+#include "Multiply.h"
+#include "Multiply.cuh"

 namespace nts { // namespace nts(NiuTrans.Tensor)

@@ -120,7 +120,7 @@ where i is the item index
 >> alpha - the coefficient
 */
 extern "C"
-void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
+void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha)
 {
 	int leadingDimRDI = a->order - leadingDim - 1;
    CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),

--- a/source/core/MultiplyElementWise.cuh
+++ b/source/core/MultiplyElementWise.cuh
@@ -19,10 +19,10 @@
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
 */

-#ifndef __MULTIPLYELEMENTWISE_CUH__
-#define __MULTIPLYELEMENTWISE_CUH__
+#ifndef __MULTIPLY_CUH__
+#define __MULTIPLY_CUH__

-#include "MultiplyElementWise.h"
+#include "Multiply.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)

@@ -42,11 +42,11 @@ void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE al

 /* element-wise product of two tensors */
 extern "C"
-void CudaMultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha);
+void CudaMultiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);

 #endif // USE_CUDA

 } // namespace nts(NiuTrans.Tensor)

-#endif // __MULTIPLYELEMENTWISE_CUH__
+#endif // __MULTIPLY_CUH__

--- a/source/core/MultiplyElementWise.h
+++ b/source/core/MultiplyElementWise.h
@@ -19,8 +19,8 @@
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
 */

-#ifndef __MULTIPLYELEMENTWISE_H__
-#define __MULTIPLYELEMENTWISE_H__
+#ifndef __MULTIPLY_H__
+#define __MULTIPLY_H__

 #include "../XTensor.h"

@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)

 /* element-wise product of two tensors */
 extern "C"
-void MultiplyElementWise(XTensor * a, XTensor * b, XTensor * c, int leadingDim, DTYPE alpha = 0);
+void Multiply(XTensor * a, XTensor * b, XTensor * c, int leadingDim = 0, DTYPE alpha = 0);

 } // namespace nts(NiuTrans.Tensor)

-#endif // __MULTIPLYELEMENTWISE_H__
\ No newline at end of file
+#endif // __MULTIPLY_H__
\ No newline at end of file
--- a/source/core/ReduceMax.cpp
+++ b/source/core/ReduceMax.cpp
@@ -20,6 +20,7 @@
 */

 #include "../XTensor.h"
+#include "../XName.h"
 #include "ReduceMax.h"
 #include "ReduceMax.cuh"

@@ -34,7 +35,7 @@ get the max value of the items along a dimension of the tensor.
 void ReduceMax(XTensor * input, XTensor * output, int dim)
 {
    CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)), 
-                        "This code must be run on the same device!");
+                  "This code must be run on the same device!");
    CheckNTErrors((input && output), "Empty input or output tensors!");
    CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
    CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
@@ -44,14 +45,18 @@ void ReduceMax(XTensor * input, XTensor * output, int dim)
    for(int i = 0; i < input->order; i++){
        if(i < dimRDI){
            CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i]), 
-                                 "Unmatched tensors!");
+                          "Unmatched tensors!");
        }
        else if(i > dimRDI){
            CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i - 1]), 
-                                "Unmatched tensors!");
+                          "Unmatched tensors!");
        }
    }

+    /* make tensor connections */
+    XLink::MakeLink(input, NULL, output, MATH_REDUCEMAX);
+    XLink::AddParamToHeadInt(output, dim);
+
    if(input->devID >= 0){
 #ifdef USE_CUDA
        CudaReduceMax(input, output, dim);

--- a/source/core/ReduceSum.cpp
+++ b/source/core/ReduceSum.cpp
@@ -22,6 +22,7 @@
 #include <math.h>
 #include "ReduceSum.h"
 #include "ReduceSum.cuh"
+#include "../XName.h"

 namespace nts{ // namespace nts(NiuTrans.Tensor)

@@ -58,6 +59,12 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP
        }
    }

+    /* make tensor connections */
+    XLink::MakeLink(input, shift, output, MATH_REDUCESUM);
+    XLink::AddParamToHeadInt(output, dim);
+    XLink::AddParamToHead(output, power);
+    XLink::AddParamToHeadInt(output, isExp);
+
    if(input->devID >= 0){
 #ifdef USE_CUDA
        CudaReduceSum(input, output, dim, shift, power, isExp);

--- a/source/core/Select.cpp
+++ b/source/core/Select.cpp
@@ -19,8 +19,9 @@
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
 */

-#include "Select.h"
 #include "../XUtility.h"
+#include "../XName.h"
+#include "Select.h"

 namespace nts{ // namespace nts(NiuTrans.Tensor)

@@ -28,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
 generate a tensor with seleccted data in range[low,high] along the given dimension 
 c = select(a) 
 >> a - input tensor
+>> c - result tensor
 >> dim - the dimension along with which we do the job
 >> low - lower bound
 >> high - higher bound.
          Note that range [1,3] means that we select 1 and 2.
->> c - result tensor
 */
-void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
+void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
 {
    CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
    CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
@@ -54,6 +55,12 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
        }
    }

+    /* make tensor connections */
+    XLink::MakeLink(a, NULL, c, MATH_SELECTRANGE);
+    XLink::AddParamToHeadInt(c, dim);
+    XLink::AddParamToHeadInt(c, low);
+    XLink::AddParamToHeadInt(c, high);
+
    int stride = 1;
    for(int i = 0; i < dim; i++)
        stride *= a->dimSizeRDI[i];

--- a/source/core/Select.h
+++ b/source/core/Select.h
@@ -28,12 +28,12 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)

 /* generate a tensor with seleccted data c = select(a) */
 extern "C" 
-void Select(XTensor * a, XTensor * indexCPU, XTensor * c);
+void Select(XTensor * a, XTensor * c, XTensor * indexCPU);

 /* generate a tensor with seleccted data in range[low,high] along the given dimension 
   c = select(a) */
 extern "C" 
-void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c);
+void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/core/Sort.cpp
+++ b/source/core/Sort.cpp
@@ -21,6 +21,7 @@

 #include "../XTensor.h"
 #include "../XUtility.h"
+#include "../XName.h"
 #include "Sort.h"
 #include "Sort.cuh"

@@ -38,6 +39,10 @@ void Sort(XTensor * a, XTensor * index, int dim)
    CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
    CheckNTErrors((index->dataType == X_INT), "Wrong data type!");

+    /* make tensor connections */
+    XLink::MakeLink(a, NULL, index, MATH_SORT);
+    XLink::AddParamToHeadInt(index, dim);
+
 	int dimRDI = a->order - dim - 1;
    /* make the index tensor */
    index->SetAscendingOrder(dim);

--- a/source/core/Sum.cpp
+++ b/source/core/Sum.cpp
@@ -20,6 +20,7 @@
 */

 #include "../XTensor.h"
+#include "../XName.h"
 #include "Sum.h"
 #include "Sum.cuh"

@@ -37,14 +38,15 @@ void Sum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
    if (c == NULL)
        c = a;

-    CheckNTErrors((a && b && c),
-        "Empty tensors in addition!");
+    CheckNTErrors(a && b && c, "Empty tensors in addition!");
+    CheckNTErrors(a->unitNum == b->unitNum && a->unitNum == c->unitNum,
+                  "Unmatched tensors in addition!");
+    CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
+                  "Unmatched tensors in addition!");

-    CheckNTErrors((a->unitNum == b->unitNum && a->unitNum == c->unitNum),
-        "Unmatched tensors in addition!");
-
-    CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
-        "Unmatched tensors in addition!");
+    /* make tensor connections */
+    XLink::MakeLink(a, b, c, MATH_SUM);
+    XLink::AddParamToHead(c, beta);

    if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {


--- a/source/core/TopK.cpp
+++ b/source/core/TopK.cpp
@@ -20,6 +20,7 @@
 */

 #include "../XTensor.h"
+#include "../XName.h"
 #include "TopK.h"
 #include "TopK.cuh"

@@ -39,6 +40,11 @@ void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
    CheckNTErrors((index == NULL || a->order == index->order), "Unmatched input tensors!");
    CheckNTErrors((index->dataType == X_INT), "Wrong data type!");

+    /* make tensor connections */
+    XLink::MakeLink(a, b, index, MATH_TOPK);
+    XLink::AddParamToHeadInt(index, dim);
+    XLink::AddParamToHeadInt(index, k);
+
    int dimRDI = a->order - dim - 1;
    for (int i = 0; i < a->order; i++) {
        if (i == dimRDI) {

--- a/source/core/Unsqueeze.cpp
+++ b/source/core/Unsqueeze.cpp
@@ -20,6 +20,7 @@
 */

 #include "../XTensor.h"
+#include "../XName.h"
 #include "Unsqueeze.h"
 #include "MergeBlockLists.h"
 #include "Unsqueeze.cuh"
@@ -39,6 +40,11 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize)
    CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!");
    CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!");

+    /* make tensor connections */
+    XLink::MakeLink(a, NULL, b, MATH_UNSQUEEZE);
+    XLink::AddParamToHeadInt(b, dim);
+    XLink::AddParamToHeadInt(b, dSize);
+
    int dimRDI = b->order - dim - 1;
    for (int i = 0; i < b->order; i++) {
        if (i < dimRDI) {

--- a/source/function/Softmax.cu
+++ b/source/function/Softmax.cu
@@ -23,7 +23,7 @@
 #include "Softmax.cuh"
 #include "Loss.cuh"
 #include "../core/ReduceSum.h"
-#include "../core/MultiplyElementWise.h"
+#include "../core/Multiply.h"
 #include "../core/Unsqueeze.h"
 #include "../core/Sum.h"
 #include "../XDevice.h"
@@ -288,7 +288,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
            beta->data = mem->AllocBuf(mem->devID, beta->unitNum * beta->unitSize);

            /* \beta = \sum_i (dE/dy_i * y_i) */
-            MultiplyElementWise(dedy, y, ytmp, 0);
+            Multiply(dedy, y, ytmp, 0);
            ReduceSum(ytmp, beta, leadDim);

            /* ytmp = dE/dy_j - \beta */
@@ -296,7 +296,7 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
            Sum(dedy, ytmp, ytmp, -1.0F);

            /* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
-            MultiplyElementWise(y, ytmp, dedx, 0);
+            Multiply(y, ytmp, dedx, 0);

            mem->ReleaseBuf(mem->devID, y->unitNum * y->unitSize);
            mem->ReleaseBuf(mem->devID, beta->unitNum * beta->unitSize);

--- a/source/sample/fnnlm/FNNLM.cpp
+++ b/source/sample/fnnlm/FNNLM.cpp
@@ -485,7 +485,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
    InitTensor(&probs, &output);
    
    /* probs[i,j] = output[i,j] * gold[i,j] */
-    MultiplyElementWise(&output, &gold, &probs, 0);
+    Multiply(&output, &gold, &probs, 0);

    /* probability of each word */
    XTensor wprobs;

--- a/source/test/TConcatenateSolely.cpp
+++ b/source/test/TConcatenateSolely.cpp
@@ -129,8 +129,12 @@ bool TestConcatenateSolely1()
 	return cpuTest && gpuTest;
 #else
 	/* destroy variables */
-	delete s1, s2, t;
-	delete[] sDimSize1, sDimSize2, tDimSize;
+    delete s1;
+    delete s2;
+    delete t;
+    delete[] sDimSize1;
+    delete[] sDimSize2;
+    delete[] tDimSize;

 	return cpuTest;
 #endif // USE_CUDA

--- a/source/test/THardTanH.cpp
+++ b/source/test/THardTanH.cpp
@@ -95,8 +95,10 @@ bool TestHardTanH1()
 	return cpuTest && gpuTest;
 #else
 	/* destroy variables */
-	delete x, y;
-	delete[] xDimSize, yDimSize;
+    delete x;
+    delete y;
+    delete[] xDimSize;
+    delete[] yDimSize;

 	return cpuTest;
 #endif // USE_CUDA

--- a/source/test/TMultiplyElementWise.cpp
+++ b/source/test/TMultiplyElementWise.cpp
@@ -21,13 +21,13 @@

 #include "../XTensor.h"
 #include "../XDevice.h"
-#include "../core/MultiplyElementWise.h"
+#include "../core/Multiply.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)
 /* case 1: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i) 
 * In this case, (2 * 1)  (2 * 1) -> (2 * 1), leadingDim=0, alpha=0.
 */
-bool TestMultiplyElementWise1()
+bool TestMultiply1()
 {
 	/* a source tensor of size 2 * 1 */
 	int sOrder1 = 2;
@@ -77,7 +77,7 @@ bool TestMultiplyElementWise1()
 	t->SetZeroAll();

 	/* call multiplyelementwise function */
-	MultiplyElementWise(s1, s2, t, 0);
+	Multiply(s1, s2, t, 0);

 	/* check results */
 	cpuTest = t->CheckData(answer, tUnitNum);
@@ -97,7 +97,7 @@ bool TestMultiplyElementWise1()
 	tGPU->SetZeroAll();

 	/* call multiplyelementwise function */
-	MultiplyElementWise(sGPU1, sGPU2, tGPU, 0);
+	Multiply(sGPU1, sGPU2, tGPU, 0);

 	/* check results */
 	gpuTest = tGPU->CheckData(answer, tUnitNum);
@@ -109,8 +109,12 @@ bool TestMultiplyElementWise1()
 	return cpuTest && gpuTest;
 #else
 	/* destroy variables */
-	delete s1, s2, t;
-	delete[] sDimSize1, sDimSize2, tDimSize;
+    delete s1;
+    delete s2;
+    delete t;
+    delete[] sDimSize1;
+    delete[] sDimSize2;
+    delete[] tDimSize;

 	return cpuTest;
 #endif // USE_CUDA
@@ -119,7 +123,7 @@ bool TestMultiplyElementWise1()
 /* case 2: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
 * In this case, (2 * 2)  (2 * 2) -> (2 * 2), leadingDim=0, alpha=0.
 */
-bool TestMultiplyElementWise2()
+bool TestMultiply2()
 {
 	/* a source tensor of size (2 * 2) */
 	int sOrder1 = 2;
@@ -172,7 +176,7 @@ bool TestMultiplyElementWise2()
 	t->SetZeroAll();

 	/* call multiplyelementwise function */
-	MultiplyElementWise(s1, s2, t, 0);
+	Multiply(s1, s2, t, 0);

 	/* check results */
 	cpuTest = t->CheckData(answer, tUnitNum);
@@ -192,7 +196,7 @@ bool TestMultiplyElementWise2()
 	tGPU->SetZeroAll();

 	/* call multiplyelementwise function */
-	MultiplyElementWise(sGPU1, sGPU2, tGPU, 0);
+	Multiply(sGPU1, sGPU2, tGPU, 0);

 	/* check results */
 	gpuTest = tGPU->CheckData(answer, tUnitNum);
@@ -214,7 +218,7 @@ bool TestMultiplyElementWise2()
 /* case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
 * In this case, (2 * 2)  (2 * 2) -> (2 * 2), leadingDim=1, alpha=0.
 */
-bool TestMultiplyElementWise3()
+bool TestMultiply3()
 {
 	/* a source tensor of size (2 * 2) */
 	int sOrder1 = 2;
@@ -267,7 +271,7 @@ bool TestMultiplyElementWise3()
 	t->SetZeroAll();

 	/* call multiplyelementwise function */
-	MultiplyElementWise(s1, s2, t, 1);
+	Multiply(s1, s2, t, 1);

 	/* check results */
 	cpuTest = t->CheckData(answer, tUnitNum);
@@ -287,7 +291,7 @@ bool TestMultiplyElementWise3()
 	tGPU->SetZeroAll();

 	/* call multiplyelementwise function */
-	MultiplyElementWise(sGPU1, sGPU2, tGPU, 1);
+	Multiply(sGPU1, sGPU2, tGPU, 1);

 	/* check results */
 	gpuTest = tGPU->CheckData(answer, tUnitNum);
@@ -313,13 +317,13 @@ TODO!!

 /* test for MultiplyElementWise Function */
 extern "C"
-bool TestMultiplyElementWise()
+bool TestMultiply()
 {
 	XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] -------------\n");
 	bool returnFlag = true, caseFlag = true;

 	/* case 1 test */
-	caseFlag = TestMultiplyElementWise1();
+	caseFlag = TestMultiply1();

 	if (!caseFlag) {
 		returnFlag = false;
@@ -329,7 +333,7 @@ bool TestMultiplyElementWise()
 		XPRINT(0, stdout, ">> case 1 passed!\n");

 	/* case 2 test */
-	caseFlag = TestMultiplyElementWise2();
+	caseFlag = TestMultiply2();

 	if (!caseFlag) {
 		returnFlag = false;
@@ -339,7 +343,7 @@ bool TestMultiplyElementWise()
 		XPRINT(0, stdout, ">> case 2 passed!\n");

 	/* case 3 test */
-	caseFlag = TestMultiplyElementWise3();
+	caseFlag = TestMultiply3();

 	if (!caseFlag) {
 		returnFlag = false;

--- a/source/test/TMultiplyElementWise.h
+++ b/source/test/TMultiplyElementWise.h
@@ -19,16 +19,16 @@
 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
 */

-#ifndef __TEST_MULTIPLYELEMENTWISE_H__
-#define __TEST_MULTIPLYELEMENTWISE_H__
+#ifndef __TEST_MULTIPLY_H__
+#define __TEST_MULTIPLY_H__

-#include "../core/MultiplyElementWise.h"
+#include "../core/Multiply.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)

 /* test for MultiplyElementWise Function */
 extern "C"
-bool TestMultiplyElementWise();
+bool TestMultiply();

 } // namespace nts(NiuTrans.Tensor)
 #endif // __TEST_MULTIPLYELEMENTWISE_H__
--- a/source/test/Test.cpp
+++ b/source/test/Test.cpp
@@ -35,7 +35,7 @@ bool Test()
    wrong = !TestMatrixMul2D() || wrong;
    wrong = !TestMatrixMulBatchedCPU() || wrong;
    wrong = !TestMerge() || wrong;
-    wrong = !TestMultiplyElementWise() || wrong;
+    wrong = !TestMultiply() || wrong;
    wrong = !TestNegate() || wrong;
    wrong = !TestNormalize() || wrong;
    //wrong = !TestPower() || wrong;

--- a/source/test/Test.h
+++ b/source/test/Test.h
@@ -28,7 +28,7 @@
 #include "TMatrixMul2D.h"
 #include "TMatrixMULBatchedCPU.h"
 #include "TMerge.h"
-#include "TMultiplyElementWise.h"
+#include "TMultiply.h"
 #include "TNegate.h"
 #include "TNormalize.h"
 #include "TPower.h"