Commit 394e8340 by xuchen

1. redefine the inferences 2. update the test 3. update the manual 4. merged with xiao

parent 9b11391e
...@@ -195,26 +195,21 @@ int main(int argc, const char ** argv) ...@@ -195,26 +195,21 @@ int main(int argc, const char ** argv)
## 访问张量中的内容 ## 访问张量中的内容
在C/C++中,我们通过XTensor.h访问张量中的内容,并且仅需要在源程序中引用XTensor.h头文件就可以完成张量的定义。 在C/C++中,我们通过XTensor.h访问张量中的内容,并且仅需要在源程序中引用XTensor.h头文件就可以完成张量的定义。
在此部分,我们主要对用户在访问张量内容时涉及到的成员变量及方法进行说明,更详细的说明请见附录。
在XTensor.h头文件中定义的成员变量说明: 在XTensor.h头文件中定义的成员变量说明:
| 成员变量 | 功能 | | 成员变量 | 功能 |
| - | - | | - | - |
| XMem * mem | 张量所使用的内存池 | | XMem * mem | 张量所使用的内存池 |
| void * data | 保存元素的数据数组 | | void * data | 保存元素的数据数组 |
| void * dataHost | 主机内存上的数据副本,只在GPU上运行时被激活 |
| int devID | 设备ID,指张量所申请的空间所在CPU或者GPU设备的编号,-1表示CPU | | int devID | 设备ID,指张量所申请的空间所在CPU或者GPU设备的编号,-1表示CPU |
| int order | 张量的维度,例如:一个矩阵(维度为2)是一个二维张量 | | int order | 张量的维度,例如:一个矩阵(维度为2)是一个二维张量 |
| int dimSize<br> [MAX_TENSOR_DIM_NUM] | 张量中每一维度的大小,索引0表示第1维 | | int dimSize<br> [MAX_TENSOR_DIM_NUM] | 张量中每一维度的大小,索引0表示第1维 |
| int dimSizeRDI<br> [MAX_TENSOR_DIM_NUM] | 转置模式下张量中每一维度的大小,索引0表示第1维 |
| TENSOR_DATA_TYPE dataType | 每个数据单元的数据类型 | | TENSOR_DATA_TYPE dataType | 每个数据单元的数据类型 |
| int unitSize | 数据单元的大小,类似于sizeof() | | int unitSize | 数据单元的大小,类似于sizeof() |
| int unitNum | 数据单元的数量 | | int unitNum | 数据单元的数量 |
| bool isSparse | 是否稠密,一个n * m稠密矩阵的数据量大小为n * m,而稀疏(非稠密)矩阵的数据量大小则取决于矩阵中非零元素个数。| | bool isSparse | 是否稠密,一个n * m稠密矩阵的数据量大小为n * m,而稀疏(非稠密)矩阵的数据量大小则取决于矩阵中非零元素个数。|
| int unitNumNonZero | 稀疏矩阵中非零元素个数 |
| float denseRatio | 稠密度,指非零单元的比例,是介于0和1之间的一个实数,0表示所有单元全为零,1表示全为非零单元。| | float denseRatio | 稠密度,指非零单元的比例,是介于0和1之间的一个实数,0表示所有单元全为零,1表示全为非零单元。|
| bool isShared | 标志数据数组是否被其他张量所共享 |
| bool isInGlobalMem | 标志数据是否在全局内存而不是内存池中 |
| bool isAllValued<br> [MAX_TENSOR_DIM_NUM] | 标志稀疏矩阵中是否每个维度都具有非零元素 |
在XTensor.h头文件中定义的方法说明: 在XTensor.h头文件中定义的方法说明:
...@@ -226,33 +221,19 @@ int main(int argc, const char ** argv) ...@@ -226,33 +221,19 @@ int main(int argc, const char ** argv)
| 得到张量中给定的维度大小 | int GetDim(const int dim) | dim - 张量的维度 | | 得到张量中给定的维度大小 | int GetDim(const int dim) | dim - 张量的维度 |
| 重新调整矩阵维度 | void Reshape(<br> const int order, const int * myDimSize) | order - 张量的维度 <br> myDimSize - 张量每一维的大小 | | 重新调整矩阵维度 | void Reshape(<br> const int order, const int * myDimSize) | order - 张量的维度 <br> myDimSize - 张量每一维的大小 |
| 得到张量中元素数量 | int GetSize() | N/A | | 得到张量中元素数量 | int GetSize() | N/A |
| 得到内存使用大小 | int GetDataSizeInChar() | N/A |
| 得到所给数据类型的数据<br> 单元大小 | int GetUnitSize(<br> TENSOR_DATA_TYPE myDataType) | myDataType - 所给数据类型 | | 得到所给数据类型的数据<br> 单元大小 | int GetUnitSize(<br> TENSOR_DATA_TYPE myDataType) | myDataType - 所给数据类型 |
| 张量中所有元素设置为0 | void SetZeroAll(XStream * stream = NULL) | stream - 多线程流| | 张量中所有元素设置为0 | void SetZeroAll(XStream * stream = NULL) | stream - 多线程流|
| 用数组赋值张量 | void SetData(<br> const void * d, int num, int beg = 0) | d - 赋值数组 <br> num - 数组大小 <br> beg - 赋值时从张量的第几位开始 | | 用数组赋值张量 | void SetData(<br> const void * d, int num, int beg = 0) | d - 赋值数组 <br> num - 数组大小 <br> beg - 赋值时从张量的第几位开始 |
| 设置张量服从均匀分布 | void SetDataRand(<br> DTYPE lower, DTYPE upper) | lower - 最小值 <br> upper - 最大值 | | 设置张量服从均匀分布 | void SetDataRand(<br> DTYPE lower, DTYPE upper) | lower - 最小值 <br> upper - 最大值 |
| 设置张量服从正态分布 | void SetDataRandn(<br> DTYPE mean, DTYPE standardDeviation) | mean - 均值 <br> standardDeviation - 标准差 | | 设置张量服从正态分布 | void SetDataRandn(<br> DTYPE mean, DTYPE standardDeviation) | mean - 均值 <br> standardDeviation - 标准差 |
| 检查张量中元素是否相同 | bool CheckData(<br> const void * answer, int num, int beg = 0) | answer - 给定数组 <br> num - 数组大小 <br> beg - 赋值时从张量的第几位开始 |
| 将给定维度中元素<br> 设置为升序 | void SetAscendingOrder(int dim) | dim - 给定维度 | | 将给定维度中元素<br> 设置为升序 | void SetAscendingOrder(int dim) | dim - 给定维度 |
| 获取张量中元素指针 | void * GetCell(int * index, int size) | index - 元素位置 <br> size-矩阵大小 |
| 获取二维张量中元素指针 | void * GetCell2D(int ni, int mi = 0) | ni - 行值 <br> mi - 列值 |
| 获取二维张量的值 | DTYPE Get2D(int ni, int mi = 0) | ni - 行值 <br> mi - 列值 | | 获取二维张量的值 | DTYPE Get2D(int ni, int mi = 0) | ni - 行值 <br> mi - 列值 |
| 获取稀疏张量的值 | DTYPE GetInSparse(int i) | i - 稀疏矩阵中非0元素位置 |
| 获取稀疏张量中<br> 元组的键值 | int GetKeyInSparse(int i) | i - 稀疏矩阵中非0元素位置 |
| 设置二维张量中<br> 的单元值 | bool Set2D(DTYPE value, int ni, int mi = 0) | value - 单元值 <br> ni - 行值 <br> mi - 列值 | | 设置二维张量中<br> 的单元值 | bool Set2D(DTYPE value, int ni, int mi = 0) | value - 单元值 <br> ni - 行值 <br> mi - 列值 |
| 增加二维张量中<br> 的单元值 | bool Add2D(DTYPE value, int ni, int mi = 0) | value - 单元值 <br> ni - 行值 <br> mi - 列值 | | 增加二维张量中<br> 的单元值 | bool Add2D(DTYPE value, int ni, int mi = 0) | value - 单元值 <br> ni - 行值 <br> mi - 列值 |
| 获取稀疏矩阵中<br> 非零元素数量 | int GetNonzeroSize() | N/A |
| 将矩阵重置为特定大小 | bool Resize(<br> const int myOrder, <br> const int * myDimSize, <br> const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, <br> const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 | | 将矩阵重置为特定大小 | bool Resize(<br> const int myOrder, <br> const int * myDimSize, <br> const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, <br> const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 |
| 将矩阵重置为特定大小<br>并不申请新空间 | bool ResizeWithNoData(<br> const int myOrder, <br> const int * myDimSize, <br> const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, <br> const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 |
| 将矩阵重置为<br> 另一矩阵大小 | bool Resize(<br> const XTensor * myTensor) | myTensor - 重置矩阵大小的参考矩阵 | | 将矩阵重置为<br> 另一矩阵大小 | bool Resize(<br> const XTensor * myTensor) | myTensor - 重置矩阵大小的参考矩阵 |
| 用二值搜索方法<br> 找到稀疏矩阵中元素 | bool BinarySearch(<br> int key, DTYPE &value, void * &position) | key - 稀疏矩阵中元素位置 <br> value - 元素值 <br> position - 元素坐标位置 |
| 将数据刷新到<br> 目标设备中 | void FlushToMem(XMem * targetMem) | targetMem - 目标设备 |
| 在全局内存中<br> 申请矩阵的内存空间 | static void AllocateData(<br> XTensor * matrix, <br> XMem * myMem = NULL, <br> bool useBuf = false) | matrix - 申请内存空间的矩阵 <br> myMem - 是否在内存池中申请空间 <br> useBuf - 是否使用缓冲区 |
| 在全局内存中<br> 释放矩阵的内存空间 | static void FreeData(<br> XTensor * matrix, <br> XMem * myMem = NULL, <br> bool useBuf = false) | matrix - 申请内存空间的矩阵 <br> myMem - 是否在内存池中申请空间 <br> useBuf - 是否使用缓冲区 |
| 在缓冲区创建张量 | XTensor * NewTensorBuf( <br> const int myOrder, <br> const int * myDimSize, XMem * myMem, <br> const TENSOR_DATA_TYPE myDataType = <br> X_FLOAT, const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myMem - 张量所使用的内存池 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 |
| 依据给定张量<br>复制一个新的张量 | XTensor * NewTensor(<br>XTensor * a, bool isFilledData = true) | a - 给定张量 <br> isFilledData - 是否申请张量中的数据空间 | | 依据给定张量<br>复制一个新的张量 | XTensor * NewTensor(<br>XTensor * a, bool isFilledData = true) | a - 给定张量 <br> isFilledData - 是否申请张量中的数据空间 |
| 依据给定张量<br>释放数据空间 | void DelTensor(<br>const XTensor * tensor) | tensor - 给定张量 | | 依据给定张量<br>释放数据空间 | void DelTensor(<br>const XTensor * tensor) | tensor - 给定张量 |
| 依据给定张量<br>在缓存中释放数据空间 | void DelTensorBuf(<br>const XTensor * tensor) | tensor - 给定张量 |
## 张量计算 ## 张量计算
...@@ -1344,8 +1325,8 @@ NiuTrans.Tensor/Tensor/test/TRectify.cpp ...@@ -1344,8 +1325,8 @@ NiuTrans.Tensor/Tensor/test/TRectify.cpp
##### 什么是HardTanH? ##### 什么是HardTanH?
HardTanH是一种激活函数,HardTanH函数定义为: HardTanH是一种激活函数,HardTanH函数定义为:
>y = 1 &nbsp;&nbsp;if x > 1 \ >y = 1 &nbsp;&nbsp;if x > 1 <br />
&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; x &nbsp;&nbsp;if -1 <= x <= 1 \ &nbsp;&nbsp; &nbsp;&nbsp;&nbsp; x &nbsp;&nbsp;if -1 <= x <= 1 <br />
&nbsp;&nbsp; &nbsp; -1 &nbsp;&nbsp;if x < -1 &nbsp;&nbsp; &nbsp; -1 &nbsp;&nbsp;if x < -1
##### HardTanH调用 ##### HardTanH调用
...@@ -1493,10 +1474,10 @@ NiuTrans.Tensor/Tensor/test/TSoftmax.cpp ...@@ -1493,10 +1474,10 @@ NiuTrans.Tensor/Tensor/test/TSoftmax.cpp
##### 什么是Loss? ##### 什么是Loss?
Loss Function(损失函数)是用来衡量神经网络模型效果及优化目标的一种损失函数,函数定义为: Loss Function(损失函数)是用来衡量神经网络模型效果及优化目标的一种损失函数,函数定义为:
>squared error : loss = sum_{i} 0.5*(gold_i - output_i)^2 \ >squared error : loss = sum_{i} 0.5*(gold_i - output_i)^2 <br />
cross entropy : loss = sum_{i} (-gold_i * log(output_i)) \ cross entropy : loss = sum_{i} (-gold_i * log(output_i)) <br />
one hot error : loss = sum_{i} e_i \ one hot error : loss = sum_{i} e_i <br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; where e_i = 0.5*(t_i - y_i)^2 &nbsp;&nbsp;if t_i = 1, \ &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; where e_i = 0.5*(t_i - y_i)^2 &nbsp;&nbsp;if t_i = 1, <br />
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;e_i = 0 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; otherwise &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;e_i = 0 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; otherwise
...@@ -1539,3 +1520,64 @@ NiuTrans.Tensor/Tensor/test/TLoss.cpp ...@@ -1539,3 +1520,64 @@ NiuTrans.Tensor/Tensor/test/TLoss.cpp
## 实例3:循环神经网络 ## 实例3:循环神经网络
## 致谢 ## 致谢
## 附录
在XTensor.h头文件中定义的成员变量说明:
| 成员变量 | 功能 |
| - | - |
| XMem * mem | 张量所使用的内存池 |
| void * data | 保存元素的数据数组 |
| void * dataHost | 主机内存上的数据副本,只在GPU上运行时被激活 |
| int devID | 设备ID,指张量所申请的空间所在CPU或者GPU设备的编号,-1表示CPU |
| int order | 张量的维度,例如:一个矩阵(维度为2)是一个二维张量 |
| int dimSize<br> [MAX_TENSOR_DIM_NUM] | 张量中每一维度的大小,索引0表示第1维 |
| int dimSizeRDI<br> [MAX_TENSOR_DIM_NUM] | 转置模式下张量中每一维度的大小,索引0表示第1维 |
| TENSOR_DATA_TYPE dataType | 每个数据单元的数据类型 |
| int unitSize | 数据单元的大小,类似于sizeof() |
| int unitNum | 数据单元的数量 |
| bool isSparse | 是否稠密,一个n * m稠密矩阵的数据量大小为n * m,而稀疏(非稠密)矩阵的数据量大小则取决于矩阵中非零元素个数。|
| int unitNumNonZero | 稀疏矩阵中非零元素个数 |
| float denseRatio | 稠密度,指非零单元的比例,是介于0和1之间的一个实数,0表示所有单元全为零,1表示全为非零单元。|
| bool isShared | 标志数据数组是否被其他张量所共享 |
| bool isInGlobalMem | 标志数据是否在全局内存而不是内存池中 |
| bool isAllValued<br> [MAX_TENSOR_DIM_NUM] | 标志稀疏矩阵中是否每个维度都具有非零元素 |
在XTensor.h头文件中定义的方法说明:
| 功能 | 函数 | 参数 |
| - | - | - |
| 判断两个张量数据类型<br>和大小是否相同 | static bool IsIdentical(<br> XTensor * a, XTensor * b) | a - 进行比较的第一个张量 <br> b - 进行比较的第二个张量 |
| 判断三个张量数据类型<br>和大小是否相同 | static bool IsIdentical(<br> XTensor * a, XTensor * b, XTensor * c) | a - 进行比较的第一个张量 <br> b - 进行比较的第二个张量 <br> c - 进行比较的第三个张量 |
| 设置张量每一维度的大小 | void SetDim(int * myDimSize) |myDimSize - 张量每一维度的大小 |
| 得到张量中给定的维度大小 | int GetDim(const int dim) | dim - 张量的维度 |
| 重新调整矩阵维度 | void Reshape(<br> const int order, const int * myDimSize) | order - 张量的维度 <br> myDimSize - 张量每一维的大小 |
| 得到张量中元素数量 | int GetSize() | N/A |
| 得到内存使用大小 | int GetDataSizeInChar() | N/A |
| 得到所给数据类型的数据<br> 单元大小 | int GetUnitSize(<br> TENSOR_DATA_TYPE myDataType) | myDataType - 所给数据类型 |
| 张量中所有元素设置为0 | void SetZeroAll(XStream * stream = NULL) | stream - 多线程流|
| 用数组赋值张量 | void SetData(<br> const void * d, int num, int beg = 0) | d - 赋值数组 <br> num - 数组大小 <br> beg - 赋值时从张量的第几位开始 |
| 设置张量服从均匀分布 | void SetDataRand(<br> DTYPE lower, DTYPE upper) | lower - 最小值 <br> upper - 最大值 |
| 设置张量服从正态分布 | void SetDataRandn(<br> DTYPE mean, DTYPE standardDeviation) | mean - 均值 <br> standardDeviation - 标准差 |
| 检查张量中元素是否相同 | bool CheckData(<br> const void * answer, int num, int beg = 0) | answer - 给定数组 <br> num - 数组大小 <br> beg - 赋值时从张量的第几位开始 |
| 将给定维度中元素<br> 设置为升序 | void SetAscendingOrder(int dim) | dim - 给定维度 |
| 获取张量中元素指针 | void * GetCell(int * index, int size) | index - 元素位置 <br> size-矩阵大小 |
| 获取二维张量中元素指针 | void * GetCell2D(int ni, int mi = 0) | ni - 行值 <br> mi - 列值 |
| 获取二维张量的值 | DTYPE Get2D(int ni, int mi = 0) | ni - 行值 <br> mi - 列值 |
| 获取稀疏张量的值 | DTYPE GetInSparse(int i) | i - 稀疏矩阵中非0元素位置 |
| 获取稀疏张量中<br> 元组的键值 | int GetKeyInSparse(int i) | i - 稀疏矩阵中非0元素位置 |
| 设置二维张量中<br> 的单元值 | bool Set2D(DTYPE value, int ni, int mi = 0) | value - 单元值 <br> ni - 行值 <br> mi - 列值 |
| 增加二维张量中<br> 的单元值 | bool Add2D(DTYPE value, int ni, int mi = 0) | value - 单元值 <br> ni - 行值 <br> mi - 列值 |
| 获取稀疏矩阵中<br> 非零元素数量 | int GetNonzeroSize() | N/A |
| 将矩阵重置为特定大小 | bool Resize(<br> const int myOrder, <br> const int * myDimSize, <br> const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, <br> const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 |
| 将矩阵重置为特定大小<br>并不申请新空间 | bool ResizeWithNoData(<br> const int myOrder, <br> const int * myDimSize, <br> const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, <br> const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 |
| 将矩阵重置为<br> 另一矩阵大小 | bool Resize(<br> const XTensor * myTensor) | myTensor - 重置矩阵大小的参考矩阵 |
| 用二值搜索方法<br> 找到稀疏矩阵中元素 | bool BinarySearch(<br> int key, DTYPE &value, void * &position) | key - 稀疏矩阵中元素位置 <br> value - 元素值 <br> position - 元素坐标位置 |
| 将数据刷新到<br> 目标设备中 | void FlushToMem(XMem * targetMem) | targetMem - 目标设备 |
| 在全局内存中<br> 申请矩阵的内存空间 | static void AllocateData(<br> XTensor * matrix, <br> XMem * myMem = NULL, <br> bool useBuf = false) | matrix - 申请内存空间的矩阵 <br> myMem - 是否在内存池中申请空间 <br> useBuf - 是否使用缓冲区 |
| 在全局内存中<br> 释放矩阵的内存空间 | static void FreeData(<br> XTensor * matrix, <br> XMem * myMem = NULL, <br> bool useBuf = false) | matrix - 申请内存空间的矩阵 <br> myMem - 是否在内存池中申请空间 <br> useBuf - 是否使用缓冲区 |
| 在缓冲区创建张量 | XTensor * NewTensorBuf( <br> const int myOrder, <br> const int * myDimSize, XMem * myMem, <br> const TENSOR_DATA_TYPE myDataType = <br> X_FLOAT, const float myDenseRatio = 1.0F) | myOrder - 张量的维度 <br> myDimSize - 张量每一维的大小,索引0表示第一维 <br> myMem - 张量所使用的内存池 <br> myDataType - 张量的数据类型 <br> myDenseRatio - 张量的稠密度,1表示稠密张量 |
| 依据给定张量<br>复制一个新的张量 | XTensor * NewTensor(<br>XTensor * a, bool isFilledData = true) | a - 给定张量 <br> isFilledData - 是否申请张量中的数据空间 |
| 依据给定张量<br>释放数据空间 | void DelTensor(<br>const XTensor * tensor) | tensor - 给定张量 |
| 依据给定张量<br>在缓存中释放数据空间 | void DelTensorBuf(<br>const XTensor * tensor) | tensor - 给定张量 |
...@@ -40,6 +40,10 @@ int main( int argc, const char ** argv ) ...@@ -40,6 +40,10 @@ int main( int argc, const char ** argv )
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
} }
XNet net;
XTensor a;
net.Backward(a);
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
return 0; return 0;
......
...@@ -23,4 +23,126 @@ ...@@ -23,4 +23,126 @@
namespace nts{ namespace nts{
unsigned int netIDGlobal = 0;
MUTEX_HANDLE netMutex;
/* generate a network id */
unsigned int MakeNetID()
{
if(tensorIDGlobal == 0)
MUTEX_INIT(netMutex);
MUTEX_LOCK(netMutex);
netIDGlobal += 3;
unsigned int id = netIDGlobal;
MUTEX_UNLOCK(netMutex);
return id;
}
/* constructor */
XNet::XNet()
{
nodes.Clear();
}
/* de-constructor */
XNet::~XNet()
{
}
/* clear the network */
void XNet::Clear()
{
nodes.Clear();
gradNodes.Clear();
outputs.Clear();
inputs.Clear();
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
{
XList roots(1);
roots.Add(&root);
XList golds(1);
golds.Add(&gold);
Backward(roots, golds, loss);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes
>> root - a list of root nodes (output) of the network
>> gold - a list of gold standard for the output
>> loss - name of loss function
*/
void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
Traverse(roots);
}
/*
traverse the net and find the topological order by
depth-first search (Tarjan's algorithm)
>> root - root node (or output of the net)
*/
void XNet::Traverse(XTensor &root)
{
XList roots(1);
roots.Add(&root);
Traverse(roots);
}
/*
traverse the net and find the topological order by
depth-first search (Tarjan's algorithm)
>> roots - a list of roots (or output nodes)
*/
void XNet::Traverse(XList &roots)
{
id = MakeNetID();
nodes.Clear();
for (int i = 0; i < roots.count; i++)
TarjanVisit((XTensor*)roots.Get(i), nodes, id);
}
/*
depth-first search given a node (Tarjan's algorithm for topological ordering)
>> node - the node to visit (mark 0:unvisited, 1:visiting, 2:done)
>> orders - topological order of the nodes
>> code - code of the network
*/
void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
{
if(node == NULL)
return;
if(node->visitMark == code + 1){
ShowNTErrors("There is a circle in the network\n");
}
else if(node->visitMark <= code || node->visitMark >= code + 2){
node->visitMark = code + 1;
XLink &income = node->income;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
if(child == NULL)
continue;
TarjanVisit(child, orders, code);
}
node->visitMark = code + 2;
orders.Add(node);
}
}
} }
\ No newline at end of file
...@@ -30,9 +30,31 @@ ...@@ -30,9 +30,31 @@
namespace nts{ namespace nts{
/* management of tensor net (or graph) */ /* management of tensor net (or graph) */
class XNet struct XNet
{ {
public: /* id of the network */
unsigned int id;
/* tensor nodes of the network (in order) */
XList nodes;
/* tensor nodes to keep gradient for output (e.g., SGD)*/
XList gradNodes;
/* output nodes of the network */
XList outputs;
/* input nodes of the network */
XList inputs;
/* constructor */
XNet();
/* de-constructor */
~XNet();
/* clear the network */
void Clear();
/* backward propagation to obtain gradient wrt. the loss/error function */ /* backward propagation to obtain gradient wrt. the loss/error function */
void Backward(XTensor &root, XTensor &gold = NULLTensor, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XTensor &root, XTensor &gold = NULLTensor, LOSS_FUNCTION_NAME loss = NOLOSS);
...@@ -40,8 +62,24 @@ public: ...@@ -40,8 +62,24 @@ public:
/* backward propagation to obtain gradient wrt. the loss/error function /* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */ with a number of root nodes */
void Backward(XList &roots, XList &golds = NULLList, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XList &roots, XList &golds = NULLList, LOSS_FUNCTION_NAME loss = NOLOSS);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
void Traverse(XTensor &root);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
void Traverse(XList &roots);
/* depth-first search given a node (Tarjan's algorithm for topological ordering) */
void TarjanVisit(XTensor * node, XList &orders, const unsigned int code);
}; };
/* we make a unique id for every tensor */
extern unsigned int netIDGlobal;
extern MUTEX_HANDLE netMutex;
extern unsigned int MakeNetID();
} }
#endif #endif
\ No newline at end of file
...@@ -53,8 +53,8 @@ int main( int argc, const char ** argv ) ...@@ -53,8 +53,8 @@ int main( int argc, const char ** argv )
if(argc > 1 && !strcmp(argv[1], "-test")) if(argc > 1 && !strcmp(argv[1], "-test"))
Test(); Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm")) //else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1); // FNNLMMain(argc - 1, argv + 1);
else{ else{
fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n"); fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
fprintf(stderr, "use of tensors. All you need is to ... \n\n"); fprintf(stderr, "use of tensors. All you need is to ... \n\n");
......
...@@ -82,7 +82,7 @@ _XINLINE_ float Float16ToFloat(unsigned short h) ...@@ -82,7 +82,7 @@ _XINLINE_ float Float16ToFloat(unsigned short h)
} }
/* /*
data conversion data type conversion
>> devID - device id >> devID - device id
>> s - source data array >> s - source data array
>> typeS - source data type >> typeS - source data type
...@@ -92,7 +92,7 @@ data conversion ...@@ -92,7 +92,7 @@ data conversion
*/ */
void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size) void ConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{ {
CheckNTErrors((devID < 0), "This code must be run on GPUs!"); CheckNTErrors((devID < 0), "This code must be run on CPUs!");
if(typeS == typeT) if(typeS == typeT)
return; return;
......
...@@ -37,6 +37,7 @@ XLink::XLink() ...@@ -37,6 +37,7 @@ XLink::XLink()
paramNum = 0; paramNum = 0;
type[0] = 0; type[0] = 0;
typeID = 0; typeID = 0;
caculator = NULL;
} }
/* deconstructor */ /* deconstructor */
...@@ -59,6 +60,8 @@ void XLink::Reset() ...@@ -59,6 +60,8 @@ void XLink::Reset()
tailNum = 0; tailNum = 0;
paramNum = 0; paramNum = 0;
type[0] = 0; type[0] = 0;
typeID = 0;
caculator = NULL;
} }
/* clear it */ /* clear it */
...@@ -68,6 +71,8 @@ void XLink::Clear() ...@@ -68,6 +71,8 @@ void XLink::Clear()
tailNum = 0; tailNum = 0;
paramNum = 0; paramNum = 0;
type[0] = 0; type[0] = 0;
typeID = 0;
caculator = NULL;
} }
/* reset tails */ /* reset tails */
...@@ -224,6 +229,7 @@ void XLink::AddParam(void * param, int size) ...@@ -224,6 +229,7 @@ void XLink::AddParam(void * param, int size)
paramNum++; paramNum++;
delete[] (char*)ps; delete[] (char*)ps;
} }
/* /*
create a hyperedge with two input tensors and a output tensor create a hyperedge with two input tensors and a output tensor
>> t1 - a tail tensor >> t1 - a tail tensor
...@@ -249,7 +255,7 @@ create a hyper edge with a list of tensors and a output tensor ...@@ -249,7 +255,7 @@ create a hyper edge with a list of tensors and a output tensor
>> h - head tensor >> h - head tensor
>> id - id of the edge type >> id - id of the edge type
*/ */
void XLink::MakeLink(XList * list, XTensor * h, int id) void XLink::MakeLink(const XList * list, XTensor * h, int id)
{ {
/* forward */ /* forward */
XLink &income = h->income; XLink &income = h->income;
...@@ -302,6 +308,43 @@ void XLink::AddParamToHeadInt(XTensor * h, int param) ...@@ -302,6 +308,43 @@ void XLink::AddParamToHeadInt(XTensor * h, int param)
} }
/* /*
add a MATRIX_TRANS_TYPE parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadTrans(XTensor * h, MATRIX_TRANS_TYPE param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(MATRIX_TRANS_TYPE));
}
/*
add a boolean parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadBool(XTensor * h, bool param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(bool));
}
/*
add a pointer parameter
>> h - head
>> param - parameter we want introduce
*/
void XLink::AddParamToHeadPointer(XTensor * h, void * param)
{
if(h != NULL)
return;
h->income.AddParam(&param, sizeof(param));
}
/*
replace a node with another, i.e., we redirect the links to the new node replace a node with another, i.e., we redirect the links to the new node
>> oldOne - the node to be replaced >> oldOne - the node to be replaced
>> newOne - the new node >> newOne - the new node
......
...@@ -77,6 +77,9 @@ struct XLink ...@@ -77,6 +77,9 @@ struct XLink
/* type id */ /* type id */
int typeID; int typeID;
/* caculator (pointer to the class for computation) */
void * caculator;
/* constuctor */ /* constuctor */
XLink(); XLink();
...@@ -124,7 +127,7 @@ struct XLink ...@@ -124,7 +127,7 @@ struct XLink
/* create a hyper edge with a list of input tensors and a output tensor */ /* create a hyper edge with a list of input tensors and a output tensor */
static static
void MakeLink(XList * list, XTensor * h, int id); void MakeLink(const XList * list, XTensor * h, int id);
/* add a parameter */ /* add a parameter */
static static
...@@ -134,6 +137,18 @@ struct XLink ...@@ -134,6 +137,18 @@ struct XLink
static static
void AddParamToHeadInt(XTensor * h, int param); void AddParamToHeadInt(XTensor * h, int param);
/* add a MATRIX_TRANS_TYPE parameter */
static
void AddParamToHeadTrans(XTensor * h, MATRIX_TRANS_TYPE param);
/* add a boolean parameter */
static
void AddParamToHeadBool(XTensor * h, bool param);
/* add a pointer parameter */
static
void AddParamToHeadPointer(XTensor * h, void * param);
/* replace a node with another, i.e., we redirect the links to the new node */ /* replace a node with another, i.e., we redirect the links to the new node */
static static
void Replace(const XTensor * oldOne, XTensor * newOne); void Replace(const XTensor * oldOne, XTensor * newOne);
......
...@@ -206,7 +206,7 @@ void XList::Insert(int pos, void * item) ...@@ -206,7 +206,7 @@ void XList::Insert(int pos, void * item)
} }
/* get the item at position i */ /* get the item at position i */
void * XList::GetItem(int i) void * XList::GetItem(int i) const
{ {
if( i >= 0 && i < count ) if( i >= 0 && i < count )
return items[i]; return items[i];
......
...@@ -74,7 +74,7 @@ public: ...@@ -74,7 +74,7 @@ public:
void AddList(XList * l); void AddList(XList * l);
void AddInt(int i); void AddInt(int i);
void Insert(int pos, void * item); void Insert(int pos, void * item);
void * GetItem(int i); void * GetItem(int i) const;
int GetItemInt(int i); int GetItemInt(int i);
void SetItem(int i, void * item); void SetItem(int i, void * item);
void SetItemInt(int i, int item); void SetItemInt(int i, int item);
......
...@@ -27,12 +27,56 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,12 +27,56 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
const char * GetOPName(int type) const char * GetOPName(int type)
{ {
if((type & MATH_ARITHMETIC) != 0){ if((type & MATH_ARITHMETIC) != 0){
if(type == MATH_SUM) if(type == MATH_ABSOLUTE)
return "M_SUM"; return "M_ABSOLUTE";
else if(type == MATH_MATRIXMUL)
return "M_MATRIXMUL";
else if(type == MATH_MATRIXMULBATCHED)
return "M_MATRIXMULBATCHED";
else if(type == MATH_MULTIPLY) else if(type == MATH_MULTIPLY)
return "M_MULTIPLY"; return "M_MULTIPLY";
else if(type == MATH_NEGATE)
return "M_NEGATE";
else if(type == MATH_SIGN)
return "M_SIGN";
else if(type == MATH_SUM)
return "M_SUM";
else if(type == MATH_LOG)
return "M_NORMALIZE";
else if(type == MATH_NORMALIZE)
return "M_LOG";
else if(type == MATH_POWER)
return "M_POWER";
else if(type == MATH_SCALEANDSHIFT) else if(type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT"; return "M_SCALEANDSHIFT";
else if(type == GETANDSET_SELECT)
return "G_SELECT";
else if(type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED";
else if(type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES";
else if(type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX";
else if(type == REDUCE_REDUCEMEAN)
return "R_REDUCEMEAN";
else if(type == REDUCE_REDUCESUM)
return "R_REDUCESUM";
else if(type == REDUCE_REDUCESUMSQUARED)
return "R_REDUCESUMSQUARED";
else if(type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE";
else if(type == SHAPE_CONCATENATE)
return "S_CONCATENATE";
else if(type == SHAPE_MERGE)
return "S_MERGE";
else if(type == SHAPE_PERMUTE)
return "S_PERMUTE";
else if(type == SHAPE_SPLIT)
return "S_SPLIT";
else if(type == SHAPE_TRANSPOSE)
return "S_TRANSPOSE";
else if(type == SHAPE_UNSQUEEZE)
return "S_UNSQUEEZE";
} }
return "NULL"; return "NULL";
......
...@@ -29,9 +29,40 @@ ...@@ -29,9 +29,40 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_ARITHMETIC 0x00001000 #define MATH_ARITHMETIC 0x00001000
#define MATH_SUM MATH_ARITHMETIC + 1 #define MATH_ABSOLUTE MATH_ARITHMETIC + 1
#define MATH_MULTIPLY MATH_SUM + 1 #define MATH_MATRIXMUL MATH_ABSOLUTE + 1
#define MATH_SCALEANDSHIFT MATH_MULTIPLY + 1 #define MATH_MATRIXMULBATCHED MATH_MATRIXMUL + 1
#define MATH_MULTIPLY MATH_MATRIXMULBATCHED + 1
#define MATH_NEGATE MATH_MULTIPLY + 1
#define MATH_SIGN MATH_NEGATE + 1
#define MATH_SUM MATH_SIGN + 1
#define MATH_LOG MATH_SUM + 1
#define MATH_NORMALIZE MATH_LOG + 1
#define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1
#define GETANDSET MATH_SCALEANDSHIFT + 1
#define GETANDSET_SELECT GETANDSET + 1
#define MOVEMENT GETANDSET_SELECT + 1
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define REDUCE MOVEMENT_COPYVALUES + 1
#define REDUCE_REDUCEMAX REDUCE + 1
#define REDUCE_REDUCEMEAN REDUCE_REDUCEMAX + 1
#define REDUCE_REDUCESUM REDUCE_REDUCEMEAN + 1
#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1
#define REDUCE_REDUCEVARIANCE REDUCE_REDUCESUMSQUARED + 1
#define SHAPE REDUCE_REDUCEVARIANCE + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_PERMUTE SHAPE_MERGE + 1
#define SHAPE_SPLIT SHAPE_PERMUTE + 1
#define SHAPE_TRANSPOSE SHAPE_SPLIT + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
/* get operator name */ /* get operator name */
const char * GetOPName(int type); const char * GetOPName(int type);
......
...@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference) ...@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference)
devID = reference.devID; devID = reference.devID;
mem = reference.mem; mem = reference.mem;
InitTensor(this, &reference); InitTensor(this, &reference);
CopyValues(&reference, this); _CopyValues(&reference, this);
} }
if(reference.isTmp) if(reference.isTmp)
...@@ -237,6 +237,7 @@ void XTensor::Init() ...@@ -237,6 +237,7 @@ void XTensor::Init()
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM); memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false; isInit = false;
isTmp = false; isTmp = false;
visitMark = 0;
} }
/* delete data arrays */ /* delete data arrays */
...@@ -299,7 +300,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -299,7 +300,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
} }
Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio); Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio);
CopyValues(&tensor, this); _CopyValues(&tensor, this);
} }
/* copy member variables */ /* copy member variables */
...@@ -344,7 +345,7 @@ judge whether the two matrices are in the same type and size ...@@ -344,7 +345,7 @@ judge whether the two matrices are in the same type and size
>> b - anther tensor to compare with >> b - anther tensor to compare with
<< return - whether the two input tensors are identical << return - whether the two input tensors are identical
*/ */
bool XTensor::IsIdentical(XTensor * a, XTensor * b) bool XTensor::IsIdentical(const XTensor * a, const XTensor * b)
{ {
if(a->order != b->order) if(a->order != b->order)
return false; return false;
...@@ -426,7 +427,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize) ...@@ -426,7 +427,7 @@ void XTensor::Reshape(const int myOrder, const int * myDimSize)
} }
/* get the number of items in the data array */ /* get the number of items in the data array */
int XTensor::GetSize() int XTensor::GetSize() const
{ {
if(isSparse) if(isSparse)
return unitNumNonZero; return unitNumNonZero;
...@@ -742,7 +743,7 @@ get the pointer to a cell ...@@ -742,7 +743,7 @@ get the pointer to a cell
>> size - size of index >> size - size of index
<< return - pointer to the cell << return - pointer to the cell
*/ */
void * XTensor::GetCell(int index[], int size) void * XTensor::GetCell(int index[], int size) const
{ {
CheckNTErrors((size == order), "Illegal index!"); CheckNTErrors((size == order), "Illegal index!");
...@@ -794,7 +795,7 @@ get the value of a cell in a 2d tensor in default type ...@@ -794,7 +795,7 @@ get the value of a cell in a 2d tensor in default type
>> mi - column index >> mi - column index
<< return - value of cell(ni, mi) in float << return - value of cell(ni, mi) in float
*/ */
DTYPE XTensor::Get2D(int ni, int mi) DTYPE XTensor::Get2D(int ni, int mi) const
{ {
CheckNTErrors((order == 2), "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors((order == 2), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((ni >= 0 && ni < dimSize[0]), "dimension 0 is out of range!"); CheckNTErrors((ni >= 0 && ni < dimSize[0]), "dimension 0 is out of range!");
...@@ -1242,7 +1243,7 @@ binary search to find an element in a sparse tensor ...@@ -1242,7 +1243,7 @@ binary search to find an element in a sparse tensor
it is the previous one if there is no hit it is the previous one if there is no hit
<< return - find it or not? << return - find it or not?
*/ */
bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) bool XTensor::BinarySearch(int key, DTYPE &value, void * &position) const
{ {
CheckNTErrors((isSparse), "A sparse tensor is required!"); CheckNTErrors((isSparse), "A sparse tensor is required!");
CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in the default type."); CheckNTErrors((dataType == DEFAULT_DTYPE), "The tensor is not in the default type.");
......
...@@ -139,6 +139,9 @@ public: ...@@ -139,6 +139,9 @@ public:
/* indicates whether the tensor is created temporarily */ /* indicates whether the tensor is created temporarily */
bool isTmp; bool isTmp;
/* mark for traversing the gragh */
unsigned int visitMark;
/* /*
the link used to form networks. Note that when we compute on tensors, we actually create a the link used to form networks. Note that when we compute on tensors, we actually create a
network where nodes are tensors and edges the connections among them. Each connection is network where nodes are tensors and edges the connections among them. Each connection is
...@@ -198,7 +201,7 @@ public: ...@@ -198,7 +201,7 @@ public:
/* judge whether the two matrices are in the same type and size */ /* judge whether the two matrices are in the same type and size */
static static
bool IsIdentical(XTensor * a, XTensor * b); bool IsIdentical(const XTensor * a, const XTensor * b);
/* judge whether the three matrices are in the same type and size */ /* judge whether the three matrices are in the same type and size */
static static
...@@ -214,7 +217,7 @@ public: ...@@ -214,7 +217,7 @@ public:
void Reshape(const int order, const int * myDimSize); void Reshape(const int order, const int * myDimSize);
/* get the number of items in the data array */ /* get the number of items in the data array */
int GetSize(); int GetSize() const;
/* get size of the memory used */ /* get size of the memory used */
int GetDataSizeInChar(); int GetDataSizeInChar();
...@@ -250,13 +253,13 @@ public: ...@@ -250,13 +253,13 @@ public:
DTYPE Get(int index[], int size = -1); DTYPE Get(int index[], int size = -1);
/* get the pointer to a cell */ /* get the pointer to a cell */
void * GetCell(int index[], int size = -1); void * GetCell(int index[], int size = -1) const;
/* get the default type value of a cell in a 1d tensor */ /* get the default type value of a cell in a 1d tensor */
DTYPE Get1D(int i); DTYPE Get1D(int i);
/* get the default type value of a cell in a 2d tensor */ /* get the default type value of a cell in a 2d tensor */
DTYPE Get2D(int ni, int mi); DTYPE Get2D(int ni, int mi) const;
/* get the default type value of a cell in a 3d tensor */ /* get the default type value of a cell in a 3d tensor */
DTYPE Get3D(int d0, int d1, int d2); DTYPE Get3D(int d0, int d1, int d2);
...@@ -311,7 +314,7 @@ public: ...@@ -311,7 +314,7 @@ public:
bool Resize(const XTensor * myTensor); bool Resize(const XTensor * myTensor);
/* binary search to find an element in a sparse matrix*/ /* binary search to find an element in a sparse matrix*/
bool BinarySearch(int key, DTYPE &value, void * &position); bool BinarySearch(int key, DTYPE &value, void * &position) const;
/* dump data to a file */ /* dump data to a file */
void Dump(FILE * file, const char * label = NULL, const int n = -1, const int verbose = 0); void Dump(FILE * file, const char * label = NULL, const int n = -1, const int verbose = 0);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/ */
#include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "Absolute.h" #include "Absolute.h"
#include "Absolute.cuh" #include "Absolute.cuh"
...@@ -29,12 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,12 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
set every entry to its absolute value set every entry to its absolute value
>> a - the tensor we are processing >> a - the tensor we are processing
*/ */
void Absolute(XTensor * a) void _Absolute(XTensor * a)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
CudaAbsolute(a); _CudaAbsolute(a);
return; return;
} }
#endif #endif
......
...@@ -58,7 +58,7 @@ set each entry to its with float16 data type value ...@@ -58,7 +58,7 @@ set each entry to its with float16 data type value
>> a - the tensor >> a - the tensor
*/ */
extern "C" extern "C"
void CudaAbsolute(XTensor * a) void _CudaAbsolute(XTensor * a)
{ {
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
......
...@@ -35,7 +35,7 @@ void KernelAbsolute(__half * d, int size); ...@@ -35,7 +35,7 @@ void KernelAbsolute(__half * d, int size);
/* set each entry to its absolute value */ /* set each entry to its absolute value */
extern "C" extern "C"
void CudaAbsolute(XTensor * a); void _CudaAbsolute(XTensor * a);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */ /* set every entry to its absolute value */
extern "C" extern "C"
void Absolute(XTensor * a); void _Absolute(XTensor * a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -37,8 +37,8 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1] ...@@ -37,8 +37,8 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> alpha - scalar >> alpha - scalar
>> beta - scalar >> beta - scalar
*/ */
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta) XList * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input lists!"); CheckNTErrors((a && b && c), "Empty input lists!");
...@@ -73,11 +73,11 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -73,11 +73,11 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!"); CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!");
#ifdef USE_BLAS #ifdef USE_BLAS
if (useBLAS) if (useBLAS)
MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
else else
MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else #else
MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif #endif
} }
//} //}
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication in batch mode (CPU code) */ /* matrix multiplication in batch mode (CPU code) */
extern "C" extern "C"
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -30,34 +30,34 @@ ...@@ -30,34 +30,34 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication. For the input tensors a and b, we perform matrix multiplication matrix multiplication
on the first two dimentsions. E.g., let A be a tensor of size y * z * m and B be
a tensor of size x * y * n. For A * B, we go over each order-2 tensor of A (of size x * y) For the input tensors a and b, we perform matrix multiplication on the first two dimentsions.
and each order-2 tensor B (of size z * x), like this E.g., let A be a tensor of size y * z * m and B be a tensor of size x * y * n.
c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta For A * B, we go over each order-2 tensor of A (of size x * y) and each order-2 tensor B (of size z * x),
where trans() returns the transposed matrix if the flag is fired, ai is the i-th like this c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
element tensor of A, bj is the j-th element tensor of B, and c_{i,j} is the (i,j) element where trans() returns the transposed matrix if the flag is fired, ai is the i-th element tensor of A,
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs bj is the j-th element tensor of B, and c_{i,j} is the (i,j) element tensor of the result C.
normal matrix multiplication if A = y * z and B = x * y. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
>> a - tensor a >> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed >> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b >> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed >> transposedB - indicates whether teh matrices in b are transposed
>> c - where we keep a*b
>> alpha - a coefficient >> alpha - a coefficient
>> beta - another coefficient >> beta - another coefficient
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
*/ */
void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
XPRunner * parallelRunner)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType), CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!"); "Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2), CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!"); "Input tensors must have a order >= 2!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
...@@ -132,7 +132,7 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -132,7 +132,7 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * ai = (XTensor*)aList->GetItem(i); XTensor * ai = (XTensor*)aList->GetItem(i);
XTensor * bi = (XTensor*)bList->GetItem(i); XTensor * bi = (XTensor*)bList->GetItem(i);
XTensor * ci = (XTensor*)cList->GetItem(i); XTensor * ci = (XTensor*)cList->GetItem(i);
MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta, parallelRunner); _MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta, parallelRunner);
} }
} }
else if (a->devID >= 0 && b->devID >= 0 && c->devID >= 0) { else if (a->devID >= 0 && b->devID >= 0 && c->devID >= 0) {
...@@ -144,7 +144,7 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -144,7 +144,7 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID); cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
CudaBLASMatrixMULList(handle, _CudaBLASMatrixMULList(handle,
aList, transposedA, aList, transposedA,
bList, transposedB, bList, transposedB,
cList, aList->count, cList, aList->count,
...@@ -157,7 +157,7 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -157,7 +157,7 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
} }
else { else {
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
MatrixMULBatchedCPU(aList, transposedA, _MatrixMULBatchedCPU(aList, transposedA,
bList, transposedB, bList, transposedB,
cList, alpha, beta); cList, alpha, beta);
} }
...@@ -184,4 +184,74 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -184,4 +184,74 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete bList; delete bList;
delete cList; delete cList;
} }
/*
matrix multiplication (return a XTensor structure)
make a new tensor to keep the result and return it
For the input tensors a and b, we perform matrix multiplication on the first two dimentsions.
E.g., let A be a tensor of size y * z * m and B be a tensor of size x * y * n.
For A * B, we go over each order-2 tensor of A (of size x * y) and each order-2 tensor B (of size z * x),
like this c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
where trans() returns the transposed matrix if the flag is fired, ai is the i-th element tensor of A,
bj is the j-th element tensor of B, and c_{i,j} is the (i,j) element tensor of the result C.
The result C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication
*/
XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{
CheckNTErrors((&a && &b), "Empty input tensors!");
CheckNTErrors((a.dataType == b.dataType), "Input tensors should have the same data type!");
CheckNTErrors((a.order >= 2 && b.order >= 2), "Input tensors must have a order >= 2!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem);
c.SetZeroAll();
c.SetTMP();
/* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHead(&c, beta);
/* destroy variables */
delete dimSize;
return c;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -27,18 +27,34 @@ ...@@ -27,18 +27,34 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication. For the input tensors a and b, we perform matrix multiplication matrix multiplication
on the first two dimentsions. E.g., let A be a tensor of size y * z * m and B be
a tensor of size x * y * n. For A * B, we go over each order-2 tensor of A (of size x * y) For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions.
and each order-2 tensor B (of size z * x), like this E.g., let A be a tensor of size y * z * m and B bea tensor of size x * y * n.
c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta For A * B, we go over each order-2 tensor of A (of size x * y) and each order-2 tensor B (of size z * x),
where trans() returns the transposed matrix if the flag is fired, ai is the i-th like this c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
element tensor of A, bj is the j-th element tensor of B, and c_{i,j} is the (i,j) element where trans() returns the transposed matrix if the flag is fired, ai is the i-th element tensor of A,
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of the result C.
normal matrix multiplication if A = y * z and B = x * y. C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/
void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication (return a XTensor structure)
make a new tensor c to keep the result and return it
For the input tensors a and b, we perform matrix multiplicationon the first two dimentsions.
E.g., let A be a tensor of size y * z * m and B bea tensor of size x * y * n.
For A * B, we go over each order-2 tensor of A (of size x * y) and each order-2 tensor B (of size z * x),
like this c_{i,j} = trans(ai) * trans(bj) * alpha + c_{i,j} * beta
where trans() returns the transposed matrix if the flag is fired, ai is the i-th element tensor of A,
bj is the j-th element tensor of B, and c_{i,j} is the (i,j) elementtensor of the result C.
C should be a tensor of z * x * n * m.
Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x * y.
*/ */
extern "C" XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -30,8 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,8 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication (for 2d tensors) matrix multiplication (for 2d tensors)
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
>> a - tensor a >> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed >> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b >> b - tensor b
...@@ -42,8 +44,8 @@ where trans() return the transposed matrix if the flag is fired ...@@ -42,8 +44,8 @@ where trans() return the transposed matrix if the flag is fired
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
>> stream - the string for creating the job pipeline >> stream - the string for creating the job pipeline
*/ */
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner, XStream * stream) XPRunner * parallelRunner, XStream * stream)
{ {
...@@ -67,7 +69,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -67,7 +69,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
CudaMatrixMul2D(a, transposedA, b, transposedB, c, alpha, beta, stream); _CudaMatrixMul2D(a, transposedA, b, transposedB, c, alpha, beta, stream);
return; return;
} }
#endif #endif
...@@ -81,9 +83,9 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -81,9 +83,9 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
c->dataType == DEFAULT_DTYPE) c->dataType == DEFAULT_DTYPE)
{ {
if (useBLAS) if (useBLAS)
MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta); _MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
else else
MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner); _MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
} }
else { else {
// TODO!! // TODO!!
......
...@@ -108,8 +108,10 @@ void KernelMatrixMulDenseMSparseMV2(DTYPE * a, MATRIX_TRANS_TYPE transposedA, in ...@@ -108,8 +108,10 @@ void KernelMatrixMulDenseMSparseMV2(DTYPE * a, MATRIX_TRANS_TYPE transposedA, in
/* /*
matrix multiplication (for 2d tensors) (cuda version) matrix multiplication (for 2d tensors) (cuda version)
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
>> a - tensor a >> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed >> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b >> b - tensor b
...@@ -119,8 +121,8 @@ where trans() return the transposed matrix if the flag is fired ...@@ -119,8 +121,8 @@ where trans() return the transposed matrix if the flag is fired
>> beta - another coefficient >> beta - another coefficient
>> stream - the string for creating the job pipeline >> stream - the string for creating the job pipeline
*/ */
void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, XTensor * c,
DTYPE alpha, DTYPE beta, XStream * stream) DTYPE alpha, DTYPE beta, XStream * stream)
{ {
...@@ -156,7 +158,7 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -156,7 +158,7 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
cublasSetStream(*handle, stream->stream); cublasSetStream(*handle, stream->stream);
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT && c->dataType == X_FLOAT) { if (a->dataType == X_FLOAT && b->dataType == X_FLOAT && c->dataType == X_FLOAT) {
CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType, b->data, transposedB, a->dataType, c->data, c->dataType, _CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType, b->data, transposedB, a->dataType, c->data, c->dataType,
a->dimSize[0], a->dimSize[1], b->dimSize[0], b->dimSize[1], c->dimSize[0], c->dimSize[1], a->dimSize[0], a->dimSize[1], b->dimSize[0], b->dimSize[1], c->dimSize[0], c->dimSize[1],
alpha, beta); alpha, beta);
} }
......
...@@ -43,7 +43,7 @@ c = trans(a) * trans(b) * alpha + c * beta ...@@ -43,7 +43,7 @@ c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
*/ */
extern "C" extern "C"
void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XStream * stream = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XStream * stream = NULL);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -31,8 +31,7 @@ matrix multiplication (for 2d tensors) ...@@ -31,8 +31,7 @@ matrix multiplication (for 2d tensors)
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
*/ */
extern "C" void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL, XStream * stream = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -38,7 +38,7 @@ argument5: matrix a ...@@ -38,7 +38,7 @@ argument5: matrix a
argument6: matrix b argument6: matrix b
argument7: matrix c (c=a*b*\alpha + c*beta) argument7: matrix c (c=a*b*\alpha + c*beta)
*/ */
void MatrixMul2DMultiTheading(XList * args) void _MatrixMul2DMultiTheading(XList * args)
{ {
int x1 = *(int*)args->GetItem(0); int x1 = *(int*)args->GetItem(0);
int y1 = *(int*)args->GetItem(1); int y1 = *(int*)args->GetItem(1);
......
...@@ -31,7 +31,7 @@ matrix multiplication for a block (x1,y1) - (x2,y2) ...@@ -31,7 +31,7 @@ matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/ */
extern "C" extern "C"
void MatrixMul2DMultiTheading(XList * args); void _MatrixMul2DMultiTheading(XList * args);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -30,6 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,6 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
matrix multiplication (for 2d tensors) with multi-threading matrix multiplication (for 2d tensors) with multi-threading
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired
>> a - tensor a >> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed >> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b >> b - tensor b
...@@ -39,10 +40,9 @@ where trans() return the transposed matrix if the flag is fired ...@@ -39,10 +40,9 @@ where trans() return the transposed matrix if the flag is fired
>> beta - another coefficient >> beta - another coefficient
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
*/ */
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
XPRunner * parallelRunner)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
...@@ -56,7 +56,7 @@ void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -56,7 +56,7 @@ void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA,
/* a * b */ /* a * b */
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) { if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) {
RunParallel2D(parallelRunner, (void*)MatrixMul2DMultiTheading, an * am * bm, RunParallel2D(parallelRunner, (void*)_MatrixMul2DMultiTheading, an * am * bm,
cn, cm, 5, cn, cm, 5,
a, b, c, &alpha, &beta); a, b, c, &alpha, &beta);
} }
......
...@@ -27,12 +27,12 @@ ...@@ -27,12 +27,12 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication (for 2d tensors) with multi-threading matrix multiplication (for 2d tensors) with multi-threading.
c = trans(a) * trans(b) * alpha + c * beta c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired where trans() return the transposed matrix if the flag is fired.
*/ */
extern "C" extern "C"
void MatrixMul2DParallel(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -30,10 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,10 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication of the two tensors matrix multiplication of the two tensors
for each 2-dimensional data array in a (denoted as ai) and for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired where trans() returns the transposed matrix if the flag is fired
>> a - tensor a >> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed >> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b >> b - tensor b
...@@ -43,8 +45,8 @@ where trans() returns the transposed matrix if the flag is fired ...@@ -43,8 +45,8 @@ where trans() returns the transposed matrix if the flag is fired
>> beta - another coefficient >> beta - another coefficient
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
*/ */
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta,
XPRunner * parallelRunner) XPRunner * parallelRunner)
{ {
...@@ -52,7 +54,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -52,7 +54,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType), CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
"Input tensors should have the same data type!"); "Input tensors should have the same data type!");
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2), CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!"); "Input tensors must have a order >= 2!");
CheckNTErrors((a->order == b->order && a->order == c->order),
"Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
...@@ -109,7 +113,7 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -109,7 +113,7 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID); cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
CudaBLASMatrixMULList(handle, _CudaBLASMatrixMULList(handle,
aList, transposedA, aList, transposedA,
bList, transposedB, bList, transposedB,
cList, aList->count, cList, aList->count,
...@@ -122,7 +126,7 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -122,7 +126,7 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
} }
else { else {
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
MatrixMULBatchedCPU(aList, transposedA, _MatrixMULBatchedCPU(aList, transposedA,
bList, transposedB, bList, transposedB,
cList, alpha, beta); cList, alpha, beta);
} }
...@@ -150,4 +154,65 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -150,4 +154,65 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
delete cList; delete cList;
} }
/*
matrix multiplication of the two tensors (do it on site)
make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired.
>> a - tensor a
>> transposedA - indicates whether the matrices in a are transposed
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors
*/
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
{
CheckNTErrors((&a && &b), "Empty input tensors!");
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors((a.order >= 2 && b.order >= 2), "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
int order = a.order;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem);
c.SetZeroAll();
c.SetTMP();
/*call _MatrixMulBatched function */
_MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHead(&c, beta);
/* destroy variables */
delete dimSize;
return c;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -28,13 +28,25 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,13 +28,25 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication of the two tensors matrix multiplication of the two tensors
for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
*/
void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
/*
matrix multiplication of the two tensors (return a XTensor structure)
make a new tensor to keep the result and return it
for each 2-dimensional data array in a (denoted as ai) and for each 2-dimensional data array in a (denoted as ai) and
each 2-dimensional data array in b (denoted as bi), we have each 2-dimensional data array in b (denoted as bi), we have
ci = trans(ai) * trans(bi) * alpha + cm * beta ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired where trans() returns the transposed matrix if the flag is fired
*/ */
extern "C" XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,14 +28,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,14 +28,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
element-wise product of two tensors element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the item where i is the index of the item
>> a - matrix a >> a - matrix a
>> b - matrix b >> b - matrix b
>> c - result matrix >> c - result matrix
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>>
*/ */
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
...@@ -121,9 +122,12 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i ...@@ -121,9 +122,12 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
} }
/* /*
element-wise product of two tensors and keep the result in the input element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i) a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item where i is the index of the item
>> a - tensor a (where keep the result) >> a - tensor a (where keep the result)
>> b - tensor b >> b - tensor b
>> alpha - the coefficient >> alpha - the coefficient
...@@ -135,9 +139,12 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -135,9 +139,12 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
} }
/* /*
make a tensor of the element-wise product for two input tensors: element-wise product of two tensors (return a XTensor structure)
make a new tensor c to keep the result and return it
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the item where i is the index of the item
>> a - tensor a >> a - tensor a
>> b - tensor b >> b - tensor b
>> alpha - the coefficient >> alpha - the coefficient
...@@ -151,7 +158,7 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim ...@@ -151,7 +158,7 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMP();
/* computation */ /* call _Multiply function */
_Multiply(&a, &b, &c, alpha, leadingDim); _Multiply(&a, &b, &c, alpha, leadingDim);
/* tensor connections */ /* tensor connections */
......
...@@ -26,19 +26,27 @@ ...@@ -26,19 +26,27 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* element-wise product of two tensors: /*
c(i) = a(i)*b(i) + \alpha * c(i) element-wise product of two tensors:
where i is the index of the element */ c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element
*/
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha = 0, int leadingDim = 0); void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha = 0, int leadingDim = 0);
/* element-wise product of two tensors and keep the result in the input tensor: /*
a(i) = a(i)*b(i) + \alpha * a(i) element-wise product of two tensors (do it on site)
where i is the index of the element */ keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element
*/
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0, int leadingDim = 0); void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0, int leadingDim = 0);
/* make a tensor of the element-wise product for two input tensors: /*
c(i) = a(i)*b(i) + \alpha * c(i) element-wise product of two tensors (return a XTensor structure)
where i is the index of the element */ make a new tensor to keep the result and return it
c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element
*/
XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0, int leadingDim = 0); XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha = 0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,12 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,12 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
set every entry to its minus value set every entry to its minus value
>> a - the tensor we are processing >> a - the tensor we are processing
*/ */
void Negate(XTensor * a) void _Negate(XTensor * a)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
CudaNegate(a); _CudaNegate(a);
return; return;
} }
#endif #endif
......
...@@ -66,7 +66,7 @@ set each entry to its negtive value ...@@ -66,7 +66,7 @@ set each entry to its negtive value
>> a - the tensor >> a - the tensor
*/ */
extern "C" extern "C"
void CudaNegate(XTensor * a) void _CudaNegate(XTensor * a)
{ {
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#ifndef __NEGATE_CUH__
#define __NEGATE_CUH__
#include "Negate.h" #include "Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -35,8 +38,10 @@ void KernelNegate(__half * d, int size); ...@@ -35,8 +38,10 @@ void KernelNegate(__half * d, int size);
/* set each entry to its negtive value */ /* set each entry to its negtive value */
extern "C" extern "C"
void CudaNegate(XTensor * a); void _CudaNegate(XTensor * a);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __NEGATE_CUH__
\ No newline at end of file
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */ /* set every entry to its minus value */
extern "C" extern "C"
void Negate(XTensor * a); void _Negate(XTensor * a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,12 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,12 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
set every entry to its sign value set every entry to its sign value
>> a - the tensor we are processing >> a - the tensor we are processing
*/ */
void Sign(XTensor * a) void _Sign(XTensor * a)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
CudaSign(a); _CudaSign(a);
return; return;
} }
#endif #endif
......
...@@ -64,7 +64,7 @@ set each entry to its with float16 data type value ...@@ -64,7 +64,7 @@ set each entry to its with float16 data type value
>> a - the tensor >> a - the tensor
*/ */
extern "C" extern "C"
void CudaSign(XTensor * a) void _CudaSign(XTensor * a)
{ {
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/ */
#ifndef __SIGN_CUH__
#define __SIGN_CUH__
#include "Sign.h" #include "Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -35,8 +38,10 @@ void KernelSign(__half * d, int size); ...@@ -35,8 +38,10 @@ void KernelSign(__half * d, int size);
/* set each entry to its sign value */ /* set each entry to its sign value */
extern "C" extern "C"
void CudaSign(XTensor * a); void _CudaSign(XTensor * a);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __SIGN_H__
\ No newline at end of file
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */ /* set every entry to its sign value */
extern "C" extern "C"
void Sign(XTensor * a); void _Sign(XTensor * a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
return a pointer
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> c - where we put a+b*\beta. we save it in a if c is NULL >> c - where we put a+b*\beta. we save it in a if c is NULL
...@@ -112,8 +112,9 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -112,8 +112,9 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
} }
/* /*
tensor summation a = a + b * \beta tensor summation a = a + b * \beta (do it on site)
do it on site keep the result in the tensor a and return nothing
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> beta - the scaling factor >> beta - the scaling factor
...@@ -124,18 +125,20 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -124,18 +125,20 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
} }
/* /*
tensor summation a = a + b * \beta tensor summation c = a + b * \beta (return a XTensor structure)
return a XTensor structure make a new tensor c to keep the result and return it
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> beta - the scaling factor >> beta - the scaling factor
<< return - the result of tensor summation
*/ */
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMP();
/* computation */ /* call _Sum function */
_Sum(&a, &b, &c, beta); _Sum(&a, &b, &c, beta);
/* tensor connections */ /* tensor connections */
......
...@@ -29,10 +29,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,10 +29,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* tensor summation c = a + b * \beta */ /* tensor summation c = a + b * \beta */
void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
/* tensor summation a = a + b * \beta (return a pointer) */ /*
tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing
*/
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta (return a structure) */ /*
tensor summation c = a + b * \beta
make a new tensor c to keep the result and return it
*/
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -37,11 +37,8 @@ where b is a vector. ...@@ -37,11 +37,8 @@ where b is a vector.
>> c - where we put a+b. we save it in a if c is NULL >> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
*/ */
void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{ {
if (c == NULL)
c = a;
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!"); CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((b->order == 2 && b->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]), CheckNTErrors((b->order == 2 && b->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
...@@ -56,7 +53,7 @@ void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) ...@@ -56,7 +53,7 @@ void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaSumByColumnTV(a, b, c, beta); _CudaSumByColumnTV(a, b, c, beta);
#endif #endif
} }
else { else {
......
...@@ -64,11 +64,8 @@ where b is a vector. ...@@ -64,11 +64,8 @@ where b is a vector.
>> c - where we put a+b. we save it in a if c is NULL >> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
*/ */
void CudaSumByColumnTV(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) void _CudaSumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{ {
if (c == NULL)
c = a;
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!"); CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((b->order == 2 && b->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]), CheckNTErrors((b->order == 2 && b->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* summation of a tensor and a vector (column vector) */ /* summation of a tensor and a vector (column vector) */
extern "C" extern "C"
void CudaSumByColumnTV(XTensor * a, XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _CudaSumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* sum of a tensor and a (column) vector */ /* sum of a tensor and a (column) vector */
extern "C" extern "C"
void SumByColumnTV(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); void _SumByColumnTV(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -37,11 +37,8 @@ where c and a are vectors, and b_col is a column in b. ...@@ -37,11 +37,8 @@ where c and a are vectors, and b_col is a column in b.
>> c - where we put a+b. we save it in a if c is NULL >> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
*/ */
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) void _SumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{ {
if (c == NULL)
c = a;
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!"); CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((a->order == 2 && a->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]), CheckNTErrors((a->order == 2 && a->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
...@@ -49,7 +46,7 @@ void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) ...@@ -49,7 +46,7 @@ void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaSumByColumnVT(a, b, c, beta); _CudaSumByColumnVT(a, b, c, beta);
#endif #endif
} }
else { else {
......
...@@ -80,11 +80,8 @@ where c and a are vectors, and b_col is a column in b. ...@@ -80,11 +80,8 @@ where c and a are vectors, and b_col is a column in b.
>> c - where we put a+b. we save it in a if c is NULL >> c - where we put a+b. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
*/ */
void CudaSumByColumnVT(XTensor * a, XTensor * b, XTensor * c, DTYPE beta) void _CudaSumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
{ {
if (c == NULL)
c = a;
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!"); CheckNTErrors((XTensor::IsIdentical(a, c)), "Unmatched tensors in addition!");
CheckNTErrors((a->order == 2 && a->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]), CheckNTErrors((a->order == 2 && a->dimSizeRDI[0] == 1 && b->dimSizeRDI[1] == a->dimSizeRDI[1]),
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* summation of a vector (column vector) and a tensor */ /* summation of a vector (column vector) and a tensor */
extern "C" extern "C"
void CudaSumByColumnVT(XTensor * a, XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _CudaSumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* sum of a (column) vector and a tensor */ /* sum of a (column) vector and a tensor */
extern "C" extern "C"
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); void _SumByColumnVT(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -36,8 +36,8 @@ c = trans(a) * trans(b) * \alpha + c * \beta ...@@ -36,8 +36,8 @@ c = trans(a) * trans(b) * \alpha + c * \beta
>> beta - scalar >> beta - scalar
>> c - output matrix (2d tensor) >> c - output matrix (2d tensor)
*/ */
void MatrixMULCPU(XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta) XTensor * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
......
...@@ -31,9 +31,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,9 +31,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void CudaBLASMatrixMUL(cublasHandle_t * handle, void _CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void * c, TENSOR_DATA_TYPE dataTypeC, void * c, TENSOR_DATA_TYPE dataTypeC,
int na, int ma, int nb, int mb, int nc, int mc, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha, DTYPE beta) DTYPE alpha, DTYPE beta)
...@@ -88,7 +88,7 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle, ...@@ -88,7 +88,7 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
/* /*
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void CudaBLASMatrixMULBatched(cublasHandle_t * handle, void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void ** c, TENSOR_DATA_TYPE dataTypeC, void ** c, TENSOR_DATA_TYPE dataTypeC,
...@@ -144,7 +144,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle, ...@@ -144,7 +144,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C" extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC, void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC,
...@@ -201,9 +201,9 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -201,9 +201,9 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
/* /*
matrix multiplication via cuda version BLAS matrix multiplication via cuda version BLAS
*/ */
void CudaBLASMatrixMULList(cublasHandle_t * handle, void _CudaBLASMatrixMULList(cublasHandle_t * handle,
XList * a, MATRIX_TRANS_TYPE transposedA, const XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, XList * c,
int count, DTYPE alpha, DTYPE beta) int count, DTYPE alpha, DTYPE beta)
{ {
...@@ -255,7 +255,7 @@ void CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -255,7 +255,7 @@ void CudaBLASMatrixMULList(cublasHandle_t * handle,
if (isUniform) { if (isUniform) {
XMem * mem = a0->mem; XMem * mem = a0->mem;
if (isStrided && a->count > 1) { if (isStrided && a->count > 1) {
CudaBLASMatrixMULBatchedStrided(handle, _CudaBLASMatrixMULBatchedStrided(handle,
a0->data, transposedA, a0->dataType, strideA / a0->unitSize, a0->data, transposedA, a0->dataType, strideA / a0->unitSize,
b0->data, transposedB, b0->dataType, strideB / b0->unitSize, b0->data, transposedB, b0->dataType, strideB / b0->unitSize,
c0->data, c0->dataType, strideC / c0->unitSize, a->count, c0->data, c0->dataType, strideC / c0->unitSize, a->count,
...@@ -297,7 +297,7 @@ void CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -297,7 +297,7 @@ void CudaBLASMatrixMULList(cublasHandle_t * handle,
cudaMemcpy(bpGPU, bp, sizeof(DTYPE*) * b->count, cudaMemcpyHostToDevice); cudaMemcpy(bpGPU, bp, sizeof(DTYPE*) * b->count, cudaMemcpyHostToDevice);
cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice); cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice);
CudaBLASMatrixMULBatched(handle, _CudaBLASMatrixMULBatched(handle,
(const void**)apGPU, transposedA, a0->dataType, (const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType, (const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count, (void**)cpGPU, c0->dataType, a->count,
...@@ -324,7 +324,7 @@ void CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -324,7 +324,7 @@ void CudaBLASMatrixMULList(cublasHandle_t * handle,
XTensor * bi = (XTensor*)b->GetItem(i); XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i); XTensor * ci = (XTensor*)c->GetItem(i);
CudaBLASMatrixMUL(handle, _CudaBLASMatrixMUL(handle,
ai->data, transposedA, ai->dataType, ai->data, transposedA, ai->dataType,
bi->data, transposedB, bi->dataType, bi->data, transposedB, bi->dataType,
ci->data, ci->dataType, ci->data, ci->dataType,
......
...@@ -28,21 +28,21 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,21 +28,21 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication (BLAS) */ /* matrix multiplication (BLAS) */
extern "C" extern "C"
void MatrixMULCPU(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
#ifdef USE_CUDA #ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */ /* matrix multiplication via cuda version BLAS */
extern "C" extern "C"
void CudaBLASMatrixMUL(cublasHandle_t * handle, void _CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void * c, TENSOR_DATA_TYPE dataTypeC, void * c, TENSOR_DATA_TYPE dataTypeC,
int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C" extern "C"
void CudaBLASMatrixMULBatched(cublasHandle_t * handle, void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void ** c, TENSOR_DATA_TYPE dataTypeC, void ** c, TENSOR_DATA_TYPE dataTypeC,
...@@ -50,7 +50,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle, ...@@ -50,7 +50,7 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C" extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC, void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC,
...@@ -58,7 +58,7 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -58,7 +58,7 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C" extern "C"
void CudaBLASMatrixMULList(cublasHandle_t * handle, XList * a, MATRIX_TRANS_TYPE transposedA, XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
#endif #endif
......
...@@ -30,15 +30,15 @@ convert data type ...@@ -30,15 +30,15 @@ convert data type
>> input - input tensor >> input - input tensor
>> output - output tensor >> output - output tensor
*/ */
void ConvertTensorDataType(XTensor * input, XTensor * output) void _ConvertDataType(const XTensor * input, XTensor * output)
{ {
CheckNTErrors(XTensor::IsIdentical(input, output), "Input and Output are different in type or size!"); CheckNTErrors((input->unitSize == output->unitSize), "Input and Output must be same in size!");
if (input->dataType == output->dataType) if (input->dataType == output->dataType)
return; return;
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (input->devID >= 0) { if (input->devID >= 0) {
CudaConvertDataType(input, output); _CudaConvertDataType(input, output);
return; return;
} }
#endif #endif
......
...@@ -78,7 +78,7 @@ data conversion (cuda code) ...@@ -78,7 +78,7 @@ data conversion (cuda code)
>> typeT - target data type >> typeT - target data type
>> size - number of the items in s (and t) >> size - number of the items in s (and t)
*/ */
void CudaConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size) void _CudaConvertDataType(int devID, void * s, TENSOR_DATA_TYPE typeS, void * t, TENSOR_DATA_TYPE typeT, int size)
{ {
CheckNTErrors((devID >= 0), "This code must be run on GPUs!"); CheckNTErrors((devID >= 0), "This code must be run on GPUs!");
...@@ -112,9 +112,9 @@ convert data type (cuda code) ...@@ -112,9 +112,9 @@ convert data type (cuda code)
>> input - input tensor >> input - input tensor
>> output - output tensor >> output - output tensor
*/ */
void CudaConvertDataType(XTensor * input, XTensor * output) void _CudaConvertDataType(const XTensor * input, XTensor * output)
{ {
CheckNTErrors(XTensor::IsIdentical(input, output), "Input and Output are different in type or size!"); CheckNTErrors((input->unitSize == output->unitSize), "Input and Output must be same in size!");
if (input->dataType == output->dataType) if (input->dataType == output->dataType)
return; return;
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/ */
#ifndef __CONVERTDATATYPE_CUH__
#define __CONVERTDATATYPE_CUH__
#include "ConvertDataType.h" #include "ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -42,8 +45,10 @@ __global__ ...@@ -42,8 +45,10 @@ __global__
void KernelIntToFloat(int * inputData, float * outputData, int size); void KernelIntToFloat(int * inputData, float * outputData, int size);
/* convert data type */ /* convert data type */
void CudaConvertDataType(XTensor * input, XTensor * output); void _CudaConvertDataType(const XTensor * input, XTensor * output);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __CONVERTDATATYPE_H__
\ No newline at end of file
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* convert data type */ /* convert data type */
void ConvertDataType(XTensor * input, XTensor * output); void _ConvertDataType(const XTensor * input, XTensor * output);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -26,8 +26,10 @@ ...@@ -26,8 +26,10 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
generate a tensor with seleccted data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
c = select(a) c = select(a)
>> a - input tensor >> a - input tensor
>> c - result tensor >> c - result tensor
>> dim - the dimension along with which we do the job >> dim - the dimension along with which we do the job
...@@ -35,7 +37,7 @@ c = select(a) ...@@ -35,7 +37,7 @@ c = select(a)
>> high - higher bound. >> high - higher bound.
Note that range [1,3] means that we select 1 and 2. Note that range [1,3] means that we select 1 and 2.
*/ */
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high) void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
{ {
CheckNTErrors(a != NULL && c != NULL, "empty tensors!"); CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!"); CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
...@@ -76,4 +78,55 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high) ...@@ -76,4 +78,55 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
} }
} }
/*
generate a tensor with selected data in range[low,high] along the given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
c = select(a)
>> a - input tensor
>> dim - the dimension along with which we do the job
>> low - lower bound
>> high - higher bound.
Note that range [1,3] means that we select 1 and 2.
<< return - the result of the generated tensor with selected data
*/
XTensor SelectRange(const XTensor &a, int dim, int low, int high)
{
int order = a.order;
int * dimSize = new int[order];
CheckNTErrors(&a != NULL, "Empty input tensors!");
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
CheckNTErrors(low < high, "Illegal range specified!");
for(int i = 0; i < a.order; i++){
if(i == dim){
CheckNTErrors(low > 0 && low < a.dimSize[dim], "Illegal range specified!");
CheckNTErrors(high > 0 && high <= a.dimSize[dim], "Illegal range specified!");
dimSize[i] = high - low;
}
else
dimSize[i] = a.dimSize[i];
}
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem);
c.SetZeroAll();
c.SetTMP();
/* call _SelectRange function */
_SelectRange(&a, &c, dim, low, high);
/* tensor connection */
XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT);
XLink::AddParamToHead(&c, low);
XLink::AddParamToHead(&c, high);
/* destroy variables */
delete dimSize;
return c;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#ifndef __SELECT_CUH__
#define __SELECT_CUH__
#include "Select.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with selected data c = select(a) */
extern "C"
void _CudaSelect(const XTensor * a, XTensor * c, XTensor * indexCPU);
/*
generate a tensor with selected data in range[low,high] along the given dimension
c = select(a)
*/
extern "C"
void _CudaSelectRange(const XTensor * a, XTensor * c, int dim, int low, int high);
} // namespace nts(NiuTrans.Tensor)
#endif // __SELECT_CUH__
\ No newline at end of file
...@@ -26,14 +26,29 @@ ...@@ -26,14 +26,29 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with seleccted data c = select(a) */ /* generate a tensor with selected data c = select(a) */
extern "C" extern "C"
void Select(XTensor * a, XTensor * c, XTensor * indexCPU); void _Select(const XTensor * a, XTensor * c, XTensor * indexCPU);
/* generate a tensor with seleccted data in range[low,high] along the given dimension /*
c = select(a) */ generate a tensor with selected data c = select(a) (returna a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Select(const XTensor &a, XTensor &indexCPU);
/*
generate a tensor with selected data in range[low,high] along the given dimension
c = select(a)
*/
extern "C" extern "C"
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high); void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high);
/*
generate a tensor with selected data in range[low,high] along the given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
c = select(a)
*/
XTensor SelectRange(const XTensor &a, int dim, int low, int high);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -77,7 +77,7 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high) ...@@ -77,7 +77,7 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
else{ else{
XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1); XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
SetDataRand(t2, low, high); SetDataRand(t2, low, high);
CopyValues(t2, tensor); _CopyValues(t2, tensor);
delete t2; delete t2;
} }
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "Log.h" #include "Log.h"
#include "Log.cuh" #include "Log.cuh"
#include <math.h>
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -29,12 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,12 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
set every entry to its log value set every entry to its log value
>> a - the tensor we are processing >> a - the tensor we are processing
*/ */
void Log(XTensor * a) void _Log(XTensor * a)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
CudaLog(a); _CudaLog(a);
return; return;
} }
#endif #endif
......
...@@ -58,7 +58,7 @@ set each entry to its log value ...@@ -58,7 +58,7 @@ set each entry to its log value
>> a - the tensor >> a - the tensor
*/ */
extern "C" extern "C"
void CudaLog(XTensor * a) void _CudaLog(XTensor * a)
{ {
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-7-11
*/ */
#ifndef __LOG_CUH__
#define __LOG_CUH__
#include "Log.h" #include "Log.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -35,8 +38,10 @@ void KernelLog(__half * d, int size); ...@@ -35,8 +38,10 @@ void KernelLog(__half * d, int size);
/* set each entry to its log value */ /* set each entry to its log value */
extern "C" extern "C"
void CudaLog(XTensor * a); void _CudaLog(XTensor * a);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __LOG_CUH__
\ No newline at end of file
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */ /* set every entry to its log value */
extern "C" extern "C"
void Log(XTensor * a); void _Log(XTensor * a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,15 +21,18 @@ ...@@ -21,15 +21,18 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
normalized the data with normal distribution. For an input x, normalized the data with normal distribution
y = a * (x-mean)/sqrt(variance+\epsilon) + b
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter. where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - dimension alone which we generate the mean and variance >> dim - dimension alone which we generate the mean and variance
...@@ -39,7 +42,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -39,7 +42,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
>> b - the bias >> b - the bias
>> epsilon - a parameter >> epsilon - a parameter
*/ */
void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTensor * var, XTensor * a, XTensor * b, DTYPE epsilon) void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors((XTensor::IsIdentical(input, output)), "Unmatched input tensors!"); CheckNTErrors((XTensor::IsIdentical(input, output)), "Unmatched input tensors!");
...@@ -68,7 +71,7 @@ void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTens ...@@ -68,7 +71,7 @@ void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTens
if (input->devID >= 0 || output->devID >= 0) { if (input->devID >= 0 || output->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaNormalize(input, output, dim, mean, var, a, b, epsilon); _CudaNormalize(input, output, dim, mean, var, a, b, epsilon);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
...@@ -91,4 +94,61 @@ void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTens ...@@ -91,4 +94,61 @@ void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTens
} }
} }
} }
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tensor and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
*/
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon)
{
_Normalize(input, input, dim, mean, var, a, b, epsilon);
}
/*
normalized the data with normal distribution (return a XTensor structure)
make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
<< return - the result of normalized the data with normal distribution
*/
XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTensor &var, const XTensor &a, const XTensor &b, DTYPE epsilon)
{
XTensor output(&input);
output.SetTMP();
/* call _Normalize function */
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
/* tensor connections */
XList list(5);
list.Add(&input);
list.Add(&mean);
list.Add(&var);
list.Add(&a);
list.Add(&b);
XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon);
return output;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -89,9 +89,9 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -89,9 +89,9 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
>> epsilon - a parameter >> epsilon - a parameter
*/ */
extern "C" extern "C"
void CudaNormalize(XTensor * input, XTensor * output, int dim, void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
XTensor * mean, XTensor * var, const XTensor * mean, const XTensor * var,
XTensor * a, XTensor * b, const XTensor * a, const XTensor * b,
DTYPE epsilon) DTYPE epsilon)
{ {
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
......
...@@ -44,9 +44,9 @@ y = a * (x-mean)/sqrt(variance+\epsilon) + b ...@@ -44,9 +44,9 @@ y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/ */
extern "C" extern "C"
void CudaNormalize(XTensor * input, XTensor * output, int dim, void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
XTensor * mean, XTensor * var, const XTensor * mean, const XTensor * var,
XTensor * a, XTensor * b, DTYPE epsilon); const XTensor * a, const XTensor * b, DTYPE epsilon);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,12 +27,29 @@ ...@@ -27,12 +27,29 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
normalized the data with normal distribution. For an input x, normalized the data with normal distribution.
y = a * (x-mean)/sqrt(variance+\epsilon) + b For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter. where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/ */
extern "C" extern "C"
void Normalize(XTensor * input, XTensor * output, int dim, XTensor * mean, XTensor * var, XTensor * a, XTensor * b, DTYPE epsilon); void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon);
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
extern "C"
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon);
/*
normalized the data with normal distribution (return a XTensor structure)
make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTensor &var, const XTensor &a, const XTensor &b, DTYPE epsilon);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -31,12 +31,12 @@ get the power(a, p) ...@@ -31,12 +31,12 @@ get the power(a, p)
>> a - the tensor >> a - the tensor
>> p - as it is >> p - as it is
*/ */
void Power(XTensor * a, DTYPE p) void _Power(XTensor * a, DTYPE p)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
CudaPower(a, p); _CudaPower(a, p);
return; return;
} }
#endif #endif
......
...@@ -96,7 +96,7 @@ void KernelPower(__half * d, __half p, int size) ...@@ -96,7 +96,7 @@ void KernelPower(__half * d, __half p, int size)
/* get the power of the entries */ /* get the power of the entries */
extern "C" extern "C"
void CudaPower(XTensor * a, DTYPE p) void _CudaPower(XTensor * a, DTYPE p)
{ {
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
......
...@@ -38,7 +38,7 @@ void KernelSqrtV2(__half * d, int size); ...@@ -38,7 +38,7 @@ void KernelSqrtV2(__half * d, int size);
/* get the power of the entries */ /* get the power of the entries */
extern "C" extern "C"
void CudaPower(XTensor * a, DTYPE p); void _CudaPower(XTensor * a, DTYPE p);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get the power(x, y) */ /* get the power(x, y) */
extern "C" extern "C"
void Power(XTensor * a, DTYPE p); void _Power(XTensor * a, DTYPE p);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,8 +28,10 @@ ...@@ -28,8 +28,10 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
scale and shift all tensor entires b = a * scale + shift scale and shift all tensor entires
b = a * scale + shift b = a * scale + shift
>> a - the input tensor >> a - the input tensor
>> b - the output tensor >> b - the output tensor
>> scale - the scaler factor >> scale - the scaler factor
...@@ -76,8 +78,11 @@ void _ScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift) ...@@ -76,8 +78,11 @@ void _ScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift)
} }
/* /*
scale and shift all tensor entires on site b = a * scale + shift scale and shift all tensor entires (do it on site)
b = a * scale + shift keep the result in the input tensor a and return nothing
a = a * scale + shift
>> a - the input/output tensor >> a - the input/output tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
...@@ -88,19 +93,22 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -88,19 +93,22 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
} }
/* /*
scale and shift all tensor entires b = a * scale + shift scale and shift all tensor entires (return a XTensor structure)
make a new tensor to keep the result and return it
b = a * scale + shift b = a * scale + shift
>> a - the input tensor >> a - the input tensor
>> b - the output tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
<< return - the result of scaling and shifting all tensor entires
*/ */
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift) XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMP(); b.SetTMP();
/* computation */ /* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift); _ScaleAndShift(&a, &b, scale, shift);
/* tensor connections */ /* tensor connections */
......
...@@ -30,13 +30,24 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -30,13 +30,24 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
#define _LinearMe _ScaleAndShiftMe #define _LinearMe _ScaleAndShiftMe
#define Linear ScaleAndShift #define Linear ScaleAndShift
/* scale and shift all tensor entires b = a * scale + shift */ /*
scale and shift all tensor entires
b = a * scale + shift
*/
void _ScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift = 0); void _ScaleAndShift(const XTensor * a, XTensor * b, DTYPE scale, DTYPE shift = 0);
/* scale and shift all tensor entires on site a = a * scale + shift */ /*
scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0); void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/* scale and shift all tensor entires b = a * scale + shift, and return the result tensor b */ /*
scale and shift all tensor entires
make a new tensor to keep the result and return it
b = a * scale + shift
*/
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0); XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -36,7 +36,7 @@ copy a number of blocks to target positions ...@@ -36,7 +36,7 @@ copy a number of blocks to target positions
>> targetBlocks - target positions of the copy >> targetBlocks - target positions of the copy
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem) void _CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem)
{ {
if (myMem != NULL && myMem->devID >= 0) { if (myMem != NULL && myMem->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -44,7 +44,7 @@ void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * ...@@ -44,7 +44,7 @@ void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int *
int * targetBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int)); int * targetBlocksTMP = (int*)myMem->AllocBuf(myMem->devID, blockNum * sizeof(int));
XMemCopy(targetBlocksTMP, myMem->devID, targetBlocks, -1, blockNum * sizeof(int)); XMemCopy(targetBlocksTMP, myMem->devID, targetBlocks, -1, blockNum * sizeof(int));
CopyBlocksOnSite(source, blockSize, blockNum, target, targetBlocksTMP, myMem); _CopyBlocksOnSite(source, blockSize, blockNum, target, targetBlocksTMP, myMem);
myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int)); myMem->ReleaseBuf(myMem->devID, blockNum * sizeof(int));
#else #else
...@@ -52,7 +52,7 @@ void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * ...@@ -52,7 +52,7 @@ void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int *
#endif #endif
} }
else { else {
CopyBlocksOnSite(source, blockSize, blockNum, target, targetBlocks, myMem); _CopyBlocksOnSite(source, blockSize, blockNum, target, targetBlocks, myMem);
} }
} }
...@@ -66,14 +66,14 @@ copy a number of blocks source source positions to target positions ...@@ -66,14 +66,14 @@ copy a number of blocks source source positions to target positions
>> targetBlocks - target positions of the copy >> targetBlocks - target positions of the copy
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID) void _CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID)
{ {
if (myMem != NULL) if (myMem != NULL)
CheckNTErrors((myMem->devID == devID), "DevIDs are different between memory pool and input devID!"); CheckNTErrors((myMem->devID == devID), "DevIDs are different between memory pool and input devID!");
if (devID >= 0) { if (devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaCopyBlocksSelected(source, blockSize, sourceBlocks, blockNum, target, targetBlocks, myMem, devID); _CudaCopyBlocksSelected(source, blockSize, sourceBlocks, blockNum, target, targetBlocks, myMem, devID);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -27,10 +27,10 @@ ...@@ -27,10 +27,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy a number of blocks to target positions */ /* copy a number of blocks to target positions */
void CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem); void _CopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem);
/* copy a number of blocks from source positions to target positions */ /* copy a number of blocks from source positions to target positions */
void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID); void _CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -38,7 +38,7 @@ Note that a grid may have a number of blocks ...@@ -38,7 +38,7 @@ Note that a grid may have a number of blocks
>> myMem - the memory pool >> myMem - the memory pool
>> isIndexOnDev - indicates whether the index is on the device already >> isIndexOnDev - indicates whether the index is on the device already
*/ */
void CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, void _CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target,
int * index, int unitSize, bool isIndexOnDev, XMem * myMem) int * index, int unitSize, bool isIndexOnDev, XMem * myMem)
{ {
CheckNTErrors((unitSize == sizeof(int)), "TODO!"); CheckNTErrors((unitSize == sizeof(int)), "TODO!");
...@@ -51,7 +51,7 @@ void CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, v ...@@ -51,7 +51,7 @@ void CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, v
XMemCopy(indexGPU, myMem->devID, index, -1, blockNum * gridNum * sizeof(int)); XMemCopy(indexGPU, myMem->devID, index, -1, blockNum * gridNum * sizeof(int));
} }
CudaCopyBlocksInGrid(source, blockSize, blockNum, gridNum, target, indexGPU, unitSize, myMem); _CudaCopyBlocksInGrid(source, blockSize, blockNum, gridNum, target, indexGPU, unitSize, myMem);
if (!isIndexOnDev) if (!isIndexOnDev)
myMem->ReleaseBuf(myMem->devID, blockNum * gridNum * sizeof(int)); myMem->ReleaseBuf(myMem->devID, blockNum * gridNum * sizeof(int));
......
...@@ -216,7 +216,7 @@ Note that a grid may have a number of blocks ...@@ -216,7 +216,7 @@ Note that a grid may have a number of blocks
>> itemSize - size of each data item >> itemSize - size of each data item
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void CudaCopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, int * index, int itemSize, XMem * myMem) void _CudaCopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, int * index, int itemSize, XMem * myMem)
{ {
CheckNTErrors((myMem != NULL && myMem->devID >= 0), "This code must be run on GPUs!"); CheckNTErrors((myMem != NULL && myMem->devID >= 0), "This code must be run on GPUs!");
CheckNTErrors((itemSize == sizeof(int)), "TODO!"); CheckNTErrors((itemSize == sizeof(int)), "TODO!");
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy data by index */ /* copy data by index */
extern "C" extern "C"
void CudaCopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, int * index, int unitSize, XMem * myMem); void _CudaCopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, int * index, int unitSize, XMem * myMem);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy a number of blocks in grid */ /* copy a number of blocks in grid */
extern "C" extern "C"
void CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, int * index, int unitSize, bool isIndexOnDev, XMem * myMem); void _CopyBlocksInGrid(void * source, int blockSize, int blockNum, int gridNum, void * target, int * index, int unitSize, bool isIndexOnDev, XMem * myMem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -36,11 +36,11 @@ all the data has been on the device (CPU/GPU) already. ...@@ -36,11 +36,11 @@ all the data has been on the device (CPU/GPU) already.
>> targetBlocks - target positions of the copy >> targetBlocks - target positions of the copy
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem) void _CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem)
{ {
if (myMem != NULL && myMem->devID >= 0) { if (myMem != NULL && myMem->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaCopyBlocks(source, blockSize, blockNum, target, targetBlocks, myMem); _CudaCopyBlocks(source, blockSize, blockNum, target, targetBlocks, myMem);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -80,7 +80,7 @@ copy a number of blocks to target positions (cuda version) ...@@ -80,7 +80,7 @@ copy a number of blocks to target positions (cuda version)
>> targetBlocks - target positions of the copy (on the device) >> targetBlocks - target positions of the copy (on the device)
>> myMem - memory pool >> myMem - memory pool
*/ */
void CudaCopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem) void _CudaCopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem)
{ {
CheckNTErrors((myMem != NULL), "No memory pool!"); CheckNTErrors((myMem != NULL), "No memory pool!");
CheckNTErrors((myMem->devID >= 0), "Wrong device to run!"); CheckNTErrors((myMem->devID >= 0), "Wrong device to run!");
......
...@@ -34,7 +34,7 @@ void KernelCopyBlocks(DTYPE * source, int blockSize, int blockNum, DTYPE * targe ...@@ -34,7 +34,7 @@ void KernelCopyBlocks(DTYPE * source, int blockSize, int blockNum, DTYPE * targe
/* copy a number of blocks to target positions (cuda version) */ /* copy a number of blocks to target positions (cuda version) */
extern "C" extern "C"
void CudaCopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem); void _CudaCopyBlocks(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy a number of blocks to target positions (on site) */ /* copy a number of blocks to target positions (on site) */
extern "C" extern "C"
void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem); void _CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target, int * targetBlocks, XMem * myMem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -70,7 +70,7 @@ copy a number of blocks from source positions to target positions (cuda version) ...@@ -70,7 +70,7 @@ copy a number of blocks from source positions to target positions (cuda version)
>> targetBlocks - target positions of the copy >> targetBlocks - target positions of the copy
>> myMem - memory pool >> myMem - memory pool
*/ */
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID) void _CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID)
{ {
CheckNTErrors((devID >= 0), "Wrong device to run!"); CheckNTErrors((devID >= 0), "Wrong device to run!");
CheckNTErrors((blockSize % sizeof(DTYPE) == 0), "Unsupported block size!"); CheckNTErrors((blockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
......
...@@ -34,7 +34,7 @@ void KernelCopyBlocksSelected(DTYPE * source, int blockSize, int * sourceBlocks, ...@@ -34,7 +34,7 @@ void KernelCopyBlocksSelected(DTYPE * source, int blockSize, int * sourceBlocks,
/* copy a number of blocks form source positions to target positions (cuda version) */ /* copy a number of blocks form source positions to target positions (cuda version) */
extern "C" extern "C"
void CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID); void _CudaCopyBlocksSelected(void * source, int blockSize, int * sourceBlocks, int blockNum, void * target, int * targetBlocks, XMem * myMem, int devID);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -36,7 +36,7 @@ copy data blocks by 2d layout ...@@ -36,7 +36,7 @@ copy data blocks by 2d layout
>> n - height of each block >> n - height of each block
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void CopyData2D(void ** s, int sPitch, void ** t, int tPitch, int blockNum, int mSize, int n, XMem * myMem) void _CopyData2D(void ** s, int sPitch, void ** t, int tPitch, int blockNum, int mSize, int n, XMem * myMem)
{ {
int devID = myMem != NULL ? myMem->devID : -1; int devID = myMem != NULL ? myMem->devID : -1;
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy data blocks by 2d layout */ /* copy data blocks by 2d layout */
extern "C" extern "C"
void CopyData2D(void ** s, int sPitch, void ** t, int tPitch, int count, int mSize, int n, XMem * myMem); void _CopyData2D(void ** s, int sPitch, void ** t, int tPitch, int count, int mSize, int n, XMem * myMem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -36,7 +36,7 @@ in the k-th grid ...@@ -36,7 +36,7 @@ in the k-th grid
>> blockNumInGrid - number of blocks in each grid >> blockNumInGrid - number of blocks in each grid
>> isIndexOnDev - indicates whether the index is on the device already >> isIndexOnDev - indicates whether the index is on the device already
*/ */
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev) void _CopyInGrid(const XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev)
{ {
CheckNTErrors((XTensor::IsIdentical(s, t)), "Unmatched tensors!"); CheckNTErrors((XTensor::IsIdentical(s, t)), "Unmatched tensors!");
...@@ -50,7 +50,7 @@ void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNu ...@@ -50,7 +50,7 @@ void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNu
CheckNTErrors((s->unitNum % (blockSize * blockNum) == 0), "Illegal block number!"); CheckNTErrors((s->unitNum % (blockSize * blockNum) == 0), "Illegal block number!");
gridNum = s->unitNum / (blockSize * blockNum); gridNum = s->unitNum / (blockSize * blockNum);
CopyBlocksInGrid(s->data, blockSize, blockNum, gridNum, t->data, index, s->unitSize, isIndexOnDev, s->mem); _CopyBlocksInGrid(s->data, blockSize, blockNum, gridNum, t->data, index, s->unitSize, isIndexOnDev, s->mem);
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy a number of blocks in grid. i.e., reorder the data blocks in the same memory piece*/ /* copy a number of blocks in grid. i.e., reorder the data blocks in the same memory piece*/
extern "C" extern "C"
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev = false); void _CopyInGrid(const XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,11 +21,13 @@ ...@@ -21,11 +21,13 @@
#include "CopyIndexed.h" #include "CopyIndexed.h"
#include "CopyBlocks.h" #include "CopyBlocks.h"
#include "../../XName.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
copy indexed sub-tensors copy indexed sub-tensors
>> s - the source tensor >> s - the source tensor
>> t - the target tensor >> t - the target tensor
>> dim - the leading dimension to define "sub-tensors" >> dim - the leading dimension to define "sub-tensors"
...@@ -34,11 +36,11 @@ copy indexed sub-tensors ...@@ -34,11 +36,11 @@ copy indexed sub-tensors
>> srcIndex - index of the source sub-tensors >> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex) >> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors >> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, e.g., >> copyNum - number of the sub-tensors we copy for each source index,
for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5 e.g., for srcIndex = [1,4] and copyNum = 2,
<< return - whether copy indexed operation was successful we actually copy the source sub-tensors 1, 2, 4, 5
*/ */
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum) void _CopyIndexed(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
{ {
CheckNTErrors((s && t), "Invalid tensors!"); CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)), CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)),
...@@ -84,12 +86,62 @@ bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSiz ...@@ -84,12 +86,62 @@ bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSiz
CheckNTErrors((tgtIndex[i] < blockNumTgt), "Index is out of range!"); CheckNTErrors((tgtIndex[i] < blockNumTgt), "Index is out of range!");
} }
CopyBlocks(s->data, blockSizeSrc * s->unitSize, realSrcIndex, realIndexSize, t->data, realTgtIndex, s->mem, s->devID); _CopyBlocks(s->data, blockSizeSrc * s->unitSize, realSrcIndex, realIndexSize, t->data, realTgtIndex, s->mem, s->devID);
delete[] realSrcIndex; delete[] realSrcIndex;
delete[] realTgtIndex; delete[] realTgtIndex;
}
/*
copy indexed sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3,2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
<< return - the result of copying indexed sub-tensors
*/
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
{
CheckNTErrors(&s, "Empty input tensor!");
CheckNTErrors((dim >= 0 && dim < s.order), "A too larget dimension specified!");
int order = s.order;
int * dimSize = new int[order];
for (int i = 0; i < s.order; i++) {
if (i == dim)
dimSize[i] = indexSize * copyNum;
else
dimSize[i] = s.dimSize[i];
}
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem);
t.SetZeroAll();
t.SetTMP();
/* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* destroy variables */
delete dimSize;
/* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHead(&t, dim);
XLink::AddParamToHeadPointer(&t, srcIndex);
XLink::AddParamToHead(&t, indexSize);
XLink::AddParamToHeadPointer(&t, tgtIndex);
XLink::AddParamToHead(&t, copyNum);
return true; return t;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -28,7 +28,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy selected sub-tensors */ /* copy selected sub-tensors */
extern "C" extern "C"
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum); void _CopyIndexed(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
/*
copy selected sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,18 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,18 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
copy s to t copy s to t
>> s - source >> s - source
>> t - target >> t - target
>> stream - the stream for creating the job pipeline >> stream - the stream for creating the job pipeline
<< return - succeeded or not
*/ */
bool CopyValues(const XTensor * s, XTensor * t, XStream * stream) void _CopyValues(const XTensor * s, XTensor * t, XStream * stream)
{ {
if (s == NULL || t == NULL) CheckNTErrors((s != NULL && t != NULL), "The input tensor and output tensor must be nonempty!");
return false; CheckNTErrors((s->data != NULL), "Cannot copy from an empty data array!");
if (s->data == NULL || t->data == NULL)
return false;
CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!"); CheckNTErrors((t->data != NULL), "Cannot copy to an empty data array!");
CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!"); CheckNTErrors((s->unitNum == t->unitNum), "Unmatched data item number!");
...@@ -48,12 +45,13 @@ bool CopyValues(const XTensor * s, XTensor * t, XStream * stream) ...@@ -48,12 +45,13 @@ bool CopyValues(const XTensor * s, XTensor * t, XStream * stream)
"The code must be run on the same device!"); "The code must be run on the same device!");
CheckNTErrors((s->isSparse || t->isSparse), "TODO!"); CheckNTErrors((s->isSparse || t->isSparse), "TODO!");
ConvertDataType(s->devID, s->data, s->dataType, t->data, t->dataType, s->unitNum); ConvertDataType(s->devID, s->data, s->dataType, t->data, t->dataType, s->unitNum);
return true;
} }
#ifdef USE_CUDA #ifdef USE_CUDA
if (s->devID >= 0 || t->devID >= 0) if (s->devID >= 0 || t->devID >= 0) {
return CudaCopyValues(s, t, stream); _CudaCopyValues(s, t, stream);
return;
}
#endif #endif
if (!s->isSparse && !t->isSparse) { if (!s->isSparse && !t->isSparse) {
...@@ -68,8 +66,28 @@ bool CopyValues(const XTensor * s, XTensor * t, XStream * stream) ...@@ -68,8 +66,28 @@ bool CopyValues(const XTensor * s, XTensor * t, XStream * stream)
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
}
/*
copy s to t (return a XTensor structure)
make a new tensor to keep the result and return it
>> s - source
>> stream - the stream for creating the job pipeline
<< return - the copyed tensor t
*/
XTensor CopyValues(const XTensor &s, XStream * stream)
{
XTensor t(&s);
t.SetTMP();
/* call _CopyValues function */
_CopyValues(&s, &t, stream);
/* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYVALUES);
return true; return t;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -35,11 +35,9 @@ copy a range of elements from a source vector to a target vector ...@@ -35,11 +35,9 @@ copy a range of elements from a source vector to a target vector
>> stream - the stream for creating the job pipeline >> stream - the stream for creating the job pipeline
<< return - succeed or not << return - succeed or not
*/ */
bool CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream) void _CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream)
{ {
if (s == NULL || t == NULL) CheckNTErrors((s != NULL && t != NULL), "The input tensor and output tensor must be nonempty!");
return false;
CheckNTErrors(s->dataType == t->dataType, "Unmatched data type!"); CheckNTErrors(s->dataType == t->dataType, "Unmatched data type!");
CheckNTErrors((s->unitSize == t->unitSize), "Incompatible vectors in value copy."); CheckNTErrors((s->unitSize == t->unitSize), "Incompatible vectors in value copy.");
CheckNTErrors((s->denseRatio <= s->denseRatio), "Incompatible vectors in value copy."); CheckNTErrors((s->denseRatio <= s->denseRatio), "Incompatible vectors in value copy.");
...@@ -83,8 +81,6 @@ bool CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream) ...@@ -83,8 +81,6 @@ bool CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream)
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
return true;
} }
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy all elements from a source matrix to a target matrix */ /* copy all elements from a source matrix to a target matrix */
extern "C" extern "C"
bool CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL); void _CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy s to t */ /* copy s to t */
extern "C" extern "C"
bool CopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL); void _CopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL);
/*
copy s to t (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor CopyValues(const XTensor &s, XStream * stream = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,12 +27,13 @@ ...@@ -27,12 +27,13 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
get the max value of the items along a dimension of the tensor. get the max value of the items along a dimension of the tensor
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
*/ */
void ReduceMax(XTensor * input, XTensor * output, int dim) void _ReduceMax(const XTensor * input, XTensor * output, int dim)
{ {
CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)), CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)),
"This code must be run on the same device!"); "This code must be run on the same device!");
...@@ -55,7 +56,7 @@ void ReduceMax(XTensor * input, XTensor * output, int dim) ...@@ -55,7 +56,7 @@ void ReduceMax(XTensor * input, XTensor * output, int dim)
if(input->devID >= 0){ if(input->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
CudaReduceMax(input, output, dim); _CudaReduceMax(input, output, dim);
#endif #endif
} }
else{ else{
...@@ -90,4 +91,43 @@ void ReduceMax(XTensor * input, XTensor * output, int dim) ...@@ -90,4 +91,43 @@ void ReduceMax(XTensor * input, XTensor * output, int dim)
} }
} }
/*
get the max value of the items along a dimension of the tensor (return a XTensor structure).
make a new tensor to keep the result and return it
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
<< return - the max value of the items along a dimension of the tensor
*/
XTensor ReduceMax(const XTensor &input, int dim)
{
CheckNTErrors(&input, "Empty input or output tensors!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i > dim)
dimSize[i] = input.dimSize[i + 1];
}
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem);
output.SetZeroAll();
output.SetTMP();
/* call _ReduceMax function */
_ReduceMax(&input, &output, dim);
/* destroy variables */
delete dimSize;
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHead(&output, dim);
return output;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -334,7 +334,7 @@ sum_i = max_{0<=j<strideNum} input_{i,j} ...@@ -334,7 +334,7 @@ sum_i = max_{0<=j<strideNum} input_{i,j}
>> output - the output tensor >> output - the output tensor
>> dim - which dimension to reduce >> dim - which dimension to reduce
*/ */
void CudaReduceMax(XTensor * input, XTensor * output, int dim) void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
{ {
CheckNTErrors((input && output), "Empty input or output tensors!"); CheckNTErrors((input && output), "Empty input or output tensors!");
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!"); CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
......
...@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* get the max-valued items along a dimension of the tensor (cuda version) */ /* get the max-valued items along a dimension of the tensor (cuda version) */
extern "C" extern "C"
void CudaReduceMax(XTensor * input, XTensor * output, int dim); void _CudaReduceMax(const XTensor * input, XTensor * output, int dim);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* get the max value of the items along a dimension of the tensor. */ /* get the max value of the items along a dimension of the tensor. */
extern "C" extern "C"
void ReduceMax(XTensor * input, XTensor * output, int dim); void _ReduceMax(const XTensor * input, XTensor * output, int dim);
/*
get the max value of the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor ReduceMax(const XTensor &input, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -20,27 +20,71 @@ ...@@ -20,27 +20,71 @@
*/ */
#include "../math/ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "../../XName.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceMean.h" #include "ReduceMean.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a, get the mean value along a dimension of the tensor
mean = (1/n) * sum_i input_i
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
*/ */
void ReduceMean(XTensor * input, XTensor * output, int dim) void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{ {
CheckNTErrors((input->order > dim), "Illegal dimension specified!"); CheckNTErrors((input->order > dim), "Illegal dimension specified!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
ReduceSum(input, output, dim); _ReduceSum(input, output, dim);
_ScaleAndShiftMe(output, (DTYPE)1/num, 0); _ScaleAndShiftMe(output, (DTYPE)1/num, 0);
} }
/*
get the mean value along a dimension of the tensor (return a XTensor structure)
make a new tenosr to keep the result and return it
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
<< return - the mean value along a dimension of the tensor
*/
XTensor ReduceMean(const XTensor &input, int dim)
{
CheckNTErrors(&input, "Empty input or output tensors!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i > dim)
dimSize[i] = input.dimSize[i + 1];
}
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem);
output.SetZeroAll();
output.SetTMP();
/* call _ReduceMean function */
_ReduceMean(&input, &output, dim);
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHead(&output, dim);
/* destroy variables */
delete dimSize;
return output;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -27,11 +27,18 @@ ...@@ -27,11 +27,18 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a, get the mean value along a dimension of the tensor
mean = (1/n) * sum_i input_i For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/ */
extern "C" extern "C"
void ReduceMean(XTensor * input, XTensor * output, int dim); void _ReduceMean(const XTensor * input, XTensor * output, int dim);
/*
get the mean value along a dimension of the tensor (return a XTensor structure)
make a new tenosr to keep the result and return it
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/
XTensor ReduceMean(const XTensor &input, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,11 +27,12 @@ ...@@ -27,11 +27,12 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a, standard variance of the items along a dimension of the tensor
For a 1-dimensional data array a,
variance = (1/n * \sum_i (a_i - mean)^2)^0.5 variance = (1/n * \sum_i (a_i - mean)^2)^0.5
*/ */
extern "C" extern "C"
void ReduceStandardVariance(XTensor * input, XTensor * output, int dim, XTensor * mean); void _ReduceStandardVariance(XTensor * input, XTensor * output, int dim, XTensor * mean);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -27,9 +27,12 @@ ...@@ -27,9 +27,12 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
sum the items along a dimension of the tensor. For a 1-dimensional data array a, sum the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^power if isExp == false sum = \sum_i (a_i - shift)^power if isExp == false
sum = \sum_i exp((a_i - shift)^power) if isExp == true sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
...@@ -37,7 +40,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -37,7 +40,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> ieExp - specify if the exp() is performed >> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array >> power - we perform pow(item_i, power) on each item in the array
*/ */
void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYPE power, bool isExp) void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor * shift, DTYPE power, bool isExp)
{ {
CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)), CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)),
"This code must be run on the same device!"); "This code must be run on the same device!");
...@@ -61,7 +64,7 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP ...@@ -61,7 +64,7 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP
if(input->devID >= 0){ if(input->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
CudaReduceSum(input, output, dim, shift, power, isExp); _CudaReduceSum(input, output, dim, shift, power, isExp);
#endif #endif
} }
else{ else{
...@@ -194,4 +197,51 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP ...@@ -194,4 +197,51 @@ void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYP
} }
} }
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^power if isExp == false
sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> shift - shift the input
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
<< return - the sum along a dimension of the tensor
*/
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp)
{
CheckNTErrors(&input, "Empty input or output tensors!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i > dim)
dimSize[i] = input.dimSize[i + 1];
}
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem);
output.SetZeroAll();
output.SetTMP();
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp);
/* tensor connection */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHead(&output, dim);
XLink::AddParamToHead(&output, power);
/* destroy variables */
delete dimSize;
return output;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -442,7 +442,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -442,7 +442,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> power - we perform pow(item_i, power) on each item >> power - we perform pow(item_i, power) on each item
>> ieExp - specify if the exp() is performed >> ieExp - specify if the exp() is performed
*/ */
void CudaReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYPE power, bool isExp) void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor * shift, DTYPE power, bool isExp)
{ {
CheckNTErrors((input && output), "Empty input or output tensors!"); CheckNTErrors((input && output), "Empty input or output tensors!");
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!"); CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
......
...@@ -29,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,13 +29,13 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
sum the items along a dimension of the tensor (cuda version) sum the items along a dimension of the tensor (cuda version).
For a 1-dimensional data array a, For a 1-dimensional data array a,
sum = \sum_i ((a_i + shift)^power) if isExp == false sum = \sum_i ((a_i + shift)^power) if isExp == false
sum = \sum_i exp((a_i + shift)^power) if isExp == true sum = \sum_i exp((a_i + shift)^power) if isExp == true
*/ */
extern "C" extern "C"
void CudaReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift, DTYPE power, bool isExp); void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor * shift, DTYPE power, bool isExp);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,12 +27,23 @@ ...@@ -27,12 +27,23 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
sum the items along a dimension of the tensor. For a 1-dimensional data array a, sum the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
extern "C" extern "C"
void ReduceSum(XTensor * input, XTensor * output, int dim, XTensor * shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false); void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor * shift = NULL,
DTYPE power = (DTYPE)1.0F, bool isExp = false);
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
*/
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -19,23 +19,68 @@ ...@@ -19,23 +19,68 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../../XName.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceSumSquared.h" #include "ReduceSumSquared.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
squared sum of the items along a dimension of the tensor. squared sum of the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2 For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> shift - bias on the input >> shift - bias on the input
*/ */
void ReduceSumSquared(XTensor * input, XTensor * output, int dim, XTensor * shift) void _ReduceSumSquared(const XTensor * input, XTensor * output, int dim, const XTensor * shift)
{ {
ReduceSum(input, output, dim, shift, 2.0F); _ReduceSum(input, output, dim, shift, 2.0F);
}
/*
squared sum of the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> shift - bias on the input
<< return - the squared sum of the items along a dimension of the tensor
*/
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
{
CheckNTErrors(&input, "Empty input or output tensors!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i > dim)
dimSize[i] = input.dimSize[i + 1];
}
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem);
output.SetZeroAll();
output.SetTMP();
/* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift);
/* tensor connection */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHead(&output, dim);
/* destroy variables */
delete dimSize;
return output;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file \ No newline at end of file
...@@ -27,11 +27,19 @@ ...@@ -27,11 +27,19 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a, squared sum of the items along a dimension of the tensor
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2 sum = \sum_i (a_i - shift)^2
*/ */
extern "C" extern "C"
void ReduceSumSquared(XTensor * input, XTensor * output, int dim, XTensor * shift); void _ReduceSumSquared(const XTensor * input, XTensor * output, int dim, const XTensor * shift);
/*
squared sum of the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
*/
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -26,21 +26,58 @@ ...@@ -26,21 +26,58 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
variance of the items along a dimension of the tensor. variance of the items along a dimension of the tensor
For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2 For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> mean - the mean value >> mean - the mean value
*/ */
void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean) void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
ReduceSum(input, output, dim, mean, 2.0F); _ReduceSum(input, output, dim, mean, 2.0F);
_ScaleAndShiftMe(output, (DTYPE)1 / num, 0); _ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
} }
} // namespace nts(NiuTrans.Tensor) /*
variance of the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> mean - the mean value
<< return - the variance of the items along a dimension of the tensor
*/
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
{
CheckNTErrors(&input, "Empty input or output tensors!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i > dim)
dimSize[i] = input.dimSize[i + 1];
}
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem);
output.SetZeroAll();
output.SetTMP();
/* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean);
/* destroy variables */
delete dimSize;
return output;
}
} // namespace nts(NiuTrans.Tensor)
...@@ -27,11 +27,18 @@ ...@@ -27,11 +27,18 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
variance of the items along a dimension of the tensor. For a 1-dimensional data array a, variance of the items along a dimension of the tensor
variance = 1/n * \sum_i (a_i - mean)^2 For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
extern "C" extern "C"
void ReduceVariance(XTensor * input, XTensor * output, int dim, XTensor * mean); void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean);
/*
variance of the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Concatenate.h" #include "Concatenate.h"
#include "Merge.h" #include "Merge.h"
#include "ConcatenateSolely.h" #include "ConcatenateSolely.h"
...@@ -28,13 +29,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,13 +29,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
concatenate a list of tensors along a given dimension concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely" Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes or "Merge" by means of the tensor shapes
>> smalls - a list of tensors for concatenation >> smalls - a list of tensors for concatenation
>> big - the resulting tensor >> big - the resulting tensor
>> dim - which dimension we perform the concatenation >> dim - which dimension we perform the concatenation
*/ */
void Concatenate(XList * smalls, XTensor * big, int dim) void _Concatenate(const XList * smalls, XTensor * big, int dim)
{ {
bool uniform = true; bool uniform = true;
for (int i = 1; i < smalls->count; i++) { for (int i = 1; i < smalls->count; i++) {
...@@ -46,25 +49,131 @@ void Concatenate(XList * smalls, XTensor * big, int dim) ...@@ -46,25 +49,131 @@ void Concatenate(XList * smalls, XTensor * big, int dim)
} }
if (uniform) if (uniform)
Merge(smalls, big, dim); _Merge(smalls, big, dim);
else else
ConcatenateSolely(smalls, big, dim); _ConcatenateSolely(smalls, big, dim);
}
/*
concatenate a list of tensors along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes
>> smalls - a list of tensors for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
<< return - the tensor of concatenating a list of tensors along a given dimension
*/
XTensor Concatenate(const XList &smalls, int dim)
{
CheckNTErrors(&smalls != NULL, "Invalid list!");
CheckNTErrors((smalls.count > 0), "Empty list!");
CheckNTErrors(dim >= 0, "Illegal dimension to concatenate!");
bool uniform = true;
for (int i = 1; i < smalls.count; i++) {
XTensor * a = (XTensor*)smalls.GetItem(i - 1);
XTensor * b = (XTensor*)smalls.GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsIdentical(a, b))
uniform = false;
}
int * dimSize;
if (uniform) {
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
dimSize = new int[order];
for (int i = 0; i < tensor->order; i++) {
if (i != dim)
dimSize[i] = tensor->dimSize[i];
else
dimSize[i] = tensor->dimSize[dim] * smalls.count;
}
XTensor big = XTensor(order, dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
big.SetZeroAll();
big.SetTMP();
/* call _Merge function */
_Merge(&smalls, &big, dim);
///* tensor connection */
//XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
//XLink::AddParamToHead(&big, dim);
/* destroy variables */
delete dimSize;
return big;
}
else {
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
dimSize = new int[order];
for (int i = 0; i < tensor->order; i++)
if (i != dim)
dimSize[i] = tensor->dimSize[i];
int catDimSize = 0;
for (int i = 0; i < smalls.count; i++) {
XTensor * tensor = (XTensor*)smalls.GetItem(i);
catDimSize += tensor->dimSize[dim];
}
dimSize[dim] = catDimSize;
XTensor big = NewTensor(order, dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
big.SetZeroAll();
big.SetTMP();
/* call _ConcatenateSolely function */
_ConcatenateSolely(&smalls, &big, dim);
/* destroy variables */
delete dimSize;
return big;
}
} }
/* /*
concatenate two tensors along a given dimension concatenate two tensors along a given dimension
>> smallA - one tensor for concatenation >> smallA - one tensor for concatenation
>> smallB - the other tensor for concatenation >> smallB - the other tensor for concatenation
>> big - the resulting tensor >> big - the resulting tensor
>> dim - which dimension we perform the concatenation >> dim - which dimension we perform the concatenation
*/ */
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim) void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim)
{ {
XList smalls(2); XList smalls(2);
smalls.Add(smallA); smalls.Add(smallA);
smalls.Add(smallB); smalls.Add(smallB);
Concatenate(&smalls, big, dim); _Concatenate(&smalls, big, dim);
}
/*
concatenate two tensors along a given dimension (return a XTensor structure).
make a new tensor to keep the result and return it.
>> smallA - one tensor for concatenation
>> smallB - the other tensor for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
<< return - the tensor of concatenating two tensor along a given dimension
*/
XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
{
XList smalls(2);
smalls.Add(&smallA);
smalls.Add(&smallB);
/* call Concatenate function */
return Concatenate(smalls, dim);
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -28,13 +28,27 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,13 +28,27 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
concatenate a list of tensors along a given dimension concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely" Note that this is actually a wrapper that selects
or "Merge" by means of the tensor shapes "ConcatenateSolely" or "Merge" by means of the tensor shapes
*/ */
void Concatenate(XList * smalls, XTensor * big, int dim); void _Concatenate(const XList * smalls, XTensor * big, int dim);
/*
concatenate a list of tensors along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
Note that this is actually a wrapper that selects
"ConcatenateSolely" or "Merge" by means of the tensor shapes
*/
XTensor Concatenate(const XList &smalls, int dim);
/* concatenate two tensors along a given dimension */ /* concatenate two tensors along a given dimension */
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim); void _Concatenate(const XTensor * smallA, const XTensor * smallB, XTensor * big, int dim);
/*
concatenate two tensors along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,11 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,11 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
concatenate a list of tensors along a given dimension concatenate a list of tensors along a given dimension
>> smalls - a list of tensors for concatenation >> smalls - a list of tensors for concatenation
>> big - the resulting tensor >> big - the resulting tensor
>> dim - which dimension we perform the concatenation >> dim - which dimension we perform the concatenation
*/ */
void ConcatenateSolely(XList * smalls, XTensor * big, int dim) void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
{ {
CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!"); CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!");
...@@ -92,7 +93,7 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim) ...@@ -92,7 +93,7 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
sourceArrays->Add(tensor->data); sourceArrays->Add(tensor->data);
} }
MergeBlockLists(sourceArrays, blockSizes, blockNum, big->data, big->mem); _MergeBlockLists(sourceArrays, blockSizes, blockNum, big->data, big->mem);
delete[] blockSizes; delete[] blockSizes;
delete sourceArrays; delete sourceArrays;
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* concatenate a list of tensors along a given dimension */ /* concatenate a list of tensors along a given dimension */
extern "C" extern "C"
void ConcatenateSolely(XList * smalls, XTensor * big, int dim); void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -35,12 +35,12 @@ set target data block index for the data movement in merge ...@@ -35,12 +35,12 @@ set target data block index for the data movement in merge
>> gridNum - number of grids >> gridNum - number of grids
>> mem - the memory pool >> mem - the memory pool
*/ */
void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge, void _MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem) int splitSizeInGrid, int gridSize, int gridNum, XMem * mem)
{ {
if (mem != NULL && mem->devID >= 0) { if (mem != NULL && mem->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaMakeMergeBlockIndex(mem->devID, blockIndex, blockNum, blockNumInMerge, splitSizeInGrid, gridSize, gridNum); _CudaMakeMergeBlockIndex(mem->devID, blockIndex, blockNum, blockNumInMerge, splitSizeInGrid, gridSize, gridNum);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -70,7 +70,7 @@ set target data block index for the data movement in split ...@@ -70,7 +70,7 @@ set target data block index for the data movement in split
>> mem - the memory pool >> mem - the memory pool
*/ */
extern "C" extern "C"
void CudaMakeMergeBlockIndex(int devID, void _CudaMakeMergeBlockIndex(int devID,
int * blockIndex, int blockNum, int blockNumInMerge, int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum) int splitSizeInGrid, int gridSize, int gridNum)
{ {
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */ /* set target data block index for the data movement in split */
extern "C" extern "C"
void CudaMakeMergeBlockIndex(int devID, void _CudaMakeMergeBlockIndex(int devID,
int * blockIndex, int blockNum, int blockNumInMerge, int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum); int splitSizeInGrid, int gridSize, int gridNum);
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in merge */ /* set target data block index for the data movement in merge */
extern "C" extern "C"
void MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge, void _MakeMergeBlockIndex(int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum, XMem * mem); int splitSizeInGrid, int gridSize, int gridNum, XMem * mem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -33,11 +33,11 @@ set target data block index for the data movement in split ...@@ -33,11 +33,11 @@ set target data block index for the data movement in split
>> blockNum - number of data blocks >> blockNum - number of data blocks
>> mem - the memory pool >> mem - the memory pool
*/ */
void MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem) void _MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem)
{ {
if (mem != NULL && mem->devID >= 0) { if (mem != NULL && mem->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaMakeSplitBlockIndex(mem->devID, blockIndex, splitNum, blockSplitSize, blockNum); _CudaMakeSplitBlockIndex(mem->devID, blockIndex, splitNum, blockSplitSize, blockNum);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -58,7 +58,7 @@ set target data block index for the data movement in split ...@@ -58,7 +58,7 @@ set target data block index for the data movement in split
>> blockNum - number of data blocks >> blockNum - number of data blocks
*/ */
extern "C" extern "C"
void CudaMakeSplitBlockIndex(int devID, int * blockIndex, int splitNum, int blockSplitSize, int blockNum) void _CudaMakeSplitBlockIndex(int devID, int * blockIndex, int splitNum, int blockSplitSize, int blockNum)
{ {
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */ /* set target data block index for the data movement in split */
extern "C" extern "C"
void CudaMakeSplitBlockIndex(int devID, int * blockIndex, int splitNum, int blockSplitSize, int blockNum); void _CudaMakeSplitBlockIndex(int devID, int * blockIndex, int splitNum, int blockSplitSize, int blockNum);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */ /* set target data block index for the data movement in split */
extern "C" extern "C"
void MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem); void _MakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSize, int blockNum, XMem * mem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,15 +29,18 @@ ...@@ -29,15 +29,18 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M) transform a tensor by merging it along with a dimension.
e.g., (N/3, M, 3) -> (N, M)
>> s - the source tensor >> s - the source tensor
>> t - the target tensor (for return) >> t - the target tensor (for return)
>> whereToMerge - the merging operation is along with which dimension >> whereToMerge - the merging operation is along with which dimension
>> leadingDim - the leading dimension of merging, take (N/3, M, 3) -> (N, M) for example >> leadingDim - the leading dimension of merging, take (N/3, M, 3) -> (N, M)
whereToMerge = 0 (i.e., the dimension for "N/3") for example, whereToMerge = 0 (i.e., the dimension for "N/3")
leadingDim = 2 (i.e., the dimension for "3") leadingDim = 2 (i.e., the dimension for "3")
*/ */
void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim) void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
{ {
int whereToMergeRDI = s->order - whereToMerge - 1; int whereToMergeRDI = s->order - whereToMerge - 1;
int leadingDimRDI = s->order - leadingDim - 1; int leadingDimRDI = s->order - leadingDim - 1;
...@@ -120,9 +123,9 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -120,9 +123,9 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
mem->AllocBuf(mem->devID, blockNum * gridNum * sizeof(int)) : mem->AllocBuf(mem->devID, blockNum * gridNum * sizeof(int)) :
XMemAlloc(mem->devID, blockNum * gridNum * sizeof(int))); XMemAlloc(mem->devID, blockNum * gridNum * sizeof(int)));
MakeMergeBlockIndex(blockIndex, blockNum, blockNumInMerge, splitSizeInGrid, gridSize, gridNum, mem); _MakeMergeBlockIndex(blockIndex, blockNum, blockNumInMerge, splitSizeInGrid, gridSize, gridNum, mem);
CopyBlocksOnSite(s->data, realBlockSize, blockNum, dataTMP, blockIndex, mem); _CopyBlocksOnSite(s->data, realBlockSize, blockNum, dataTMP, blockIndex, mem);
if (mem != NULL) if (mem != NULL)
mem->ReleaseBuf(mem->devID, blockNum * gridNum * sizeof(int)); mem->ReleaseBuf(mem->devID, blockNum * gridNum * sizeof(int));
...@@ -144,12 +147,60 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -144,12 +147,60 @@ void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
} }
/* /*
transform a tensor by merging it along with a dimension (return a XTensor structure)
make a new tensor to keep the result and return it
e.g., (N/3, M, 3) -> (N, M)
>> s - the source tensor
>> whereToMerge - the merging operation is along with which dimension
>> leadingDim - the leading dimension of merging, take (N/3, M, 3) -> (N, M)
for example, whereToMerge = 0 (i.e., the dimension for "N/3")
leadingDim = 2 (i.e., the dimension for "3")
<< return - the transformed tensor by merging along with a dimension
*/
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
{
CheckNTErrors(&s != NULL, "Invalid tensors!");
CheckNTErrors((leadingDim < whereToMerge), "Invalid leading dimension!");
if (leadingDim < 0)
leadingDim = 0;
int order = s.order - 1;
int * dimSize = new int[order];
for (int i = 0; i < s.order; i++) {
if (i < leadingDim)
dimSize[i] = s.dimSize[i];
else if (i > leadingDim) {
if (i != whereToMerge)
dimSize[i - 1] = s.dimSize[i];
else
dimSize[i - 1] = s.dimSize[i] * s.dimSize[leadingDim];
}
}
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem);
t.SetZeroAll();
t.SetTMP();
/* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim);
/* destroy variables */
delete dimSize;
return t;
}
/*
merge small tensors into a big tensor merge small tensors into a big tensor
>> smalls - the list of the small tensors >> smalls - the list of the small tensors
>> big - the merged tensor (for return) >> big - the merged tensor (for return)
>> whereToMerge - the merging operation is along with which dimension >> whereToMerge - the merging operation is along with which dimension
*/ */
void Merge(XList * smalls, XTensor * big, int whereToMerge) void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
{ {
CheckNTErrors((smalls != NULL), "Invalid list!"); CheckNTErrors((smalls != NULL), "Invalid list!");
CheckNTErrors((smalls->count > 0), "Empty list!"); CheckNTErrors((smalls->count > 0), "Empty list!");
...@@ -241,7 +292,7 @@ void Merge(XList * smalls, XTensor * big, int whereToMerge) ...@@ -241,7 +292,7 @@ void Merge(XList * smalls, XTensor * big, int whereToMerge)
} }
} }
Merge(tensorTMP, big, whereToMerge); _Merge(tensorTMP, big, whereToMerge);
delete[] dimSizeTMP; delete[] dimSizeTMP;
tensorTMP->data = NULL; tensorTMP->data = NULL;
...@@ -255,4 +306,38 @@ void Merge(XList * smalls, XTensor * big, int whereToMerge) ...@@ -255,4 +306,38 @@ void Merge(XList * smalls, XTensor * big, int whereToMerge)
XMemFree(mem->devID, dataTMP); XMemFree(mem->devID, dataTMP);
} }
} }
/*
merge small tensors into a big tensor (return a XTensor structure)
make a new tensor to keep the result and return it
>> smalls - the list of the small tensors
>> whereToMerge - the merging operation is along with which dimension
<< return - the big tensor merged by small tensors
*/
XTensor Merge(const XList &smalls, int whereToMerge)
{
XTensor * tensor = (XTensor*)smalls.GetItem(0);
int order = tensor->order;
int * dimSize = new int[order];
for (int i = 0; i < tensor->order; i++) {
if (i != whereToMerge)
dimSize[i] = tensor->dimSize[i];
else
dimSize[i] = tensor->dimSize[whereToMerge] * smalls.count;
}
XTensor big = NewTensor(order, dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
big.SetZeroAll();
big.SetTMP();
/* call _Merge function */
_Merge(&smalls, &big, whereToMerge);
/* destroy variables */
delete dimSize;
return big;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -27,10 +27,23 @@ ...@@ -27,10 +27,23 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */ /* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
void Merge(XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1); void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -1);
/*
transform a tensor by merging it alone with a dimension (return a XTensor structure).
make a new tensor to keep the result and return it.
e.g., (M, N/3, 3) -> (M, N)
*/
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */ /* merge small tensors into a big tensor */
void Merge(XList * smalls, XTensor * big, int whereToMerge); void _Merge(const XList * smalls, XTensor * big, int whereToMerge);
/*
merge small tensors into a big tensor (return a XTensor structure).
make a new tensor to keep the result and return it.
*/
XTensor Merge(const XList &smalls, int whereToMerge);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -34,11 +34,11 @@ merge data by blocks ...@@ -34,11 +34,11 @@ merge data by blocks
>> target - target data array >> target - target data array
>> myMem - memory pool >> myMem - memory pool
*/ */
void MergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem) void _MergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
{ {
if (myMem != NULL && myMem->devID >= 0) { if (myMem != NULL && myMem->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaMergeBlockLists(sourceList, blockSizes, blockNum, target, myMem); _CudaMergeBlockLists(sourceList, blockSizes, blockNum, target, myMem);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -72,7 +72,7 @@ merge data by blocks (cuda version) ...@@ -72,7 +72,7 @@ merge data by blocks (cuda version)
>> myMem - the memory pool >> myMem - the memory pool
*/ */
extern "C" extern "C"
void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem) void _CudaMergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem)
{ {
CheckNTErrors((myMem != NULL), "No memory pool!"); CheckNTErrors((myMem != NULL), "No memory pool!");
CheckNTErrors((myMem->devID >= 0), "Wrong device to run!"); CheckNTErrors((myMem->devID >= 0), "Wrong device to run!");
......
...@@ -34,7 +34,7 @@ void KernelCopyBlockLists(DTYPE ** sourceList, int * sourceBlockSizes, int sourc ...@@ -34,7 +34,7 @@ void KernelCopyBlockLists(DTYPE ** sourceList, int * sourceBlockSizes, int sourc
/* merge data by blocks (cuda version) */ /* merge data by blocks (cuda version) */
extern "C" extern "C"
void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem); void _CudaMergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* merge data by blocks */ /* merge data by blocks */
extern "C" extern "C"
void MergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem); void _MergeBlockLists(const XList * sourceList, int * blockSizes, int blockNum, void * target, XMem * myMem);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,17 +28,25 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,17 +28,25 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define permute _Permute_ #define permute _Permute_
/* generate the tensor with permuted dimensions: b = permuted(a) */ /*
void Permute(XTensor * a, XTensor * b, int * dimPermute); generate the tensor with permuted dimensions.
b = permuted(a)
/* permute the tensor dimensions on site: a = permuted(a) */ */
void Permute_(XTensor * a, int * dimPermute); void _Permute(XTensor * a, XTensor * b, int * dimPermute);
/* make a tensor with permuted dimensions: b = permuted(a) and return its pointer */ /*
XTensor * _Permute(XTensor *a, int * dimPermute); permute the tensor dimensions (do it on site).
keep the result in the input tensor and return nothing.
a = permuted(a)
*/
void _PermuteMe(XTensor * a, int * dimPermute);
/* make a tensor with permuted dimensions: b = permuted(a) and return its body */ /*
XTensor& _Permute_(XTensor &a, int * dimPermute); make a tensor with permuted dimensions (return a XTensor structure).
make a new tensor to keep the result and return it.
b = permuted(a)
*/
XTensor Permute(XTensor &a, int * dimPermute);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,12 +29,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,12 +29,13 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
>> s - the source tensor >> s - the source tensor
>> t - the target tensor (for return) >> t - the target tensor (for return)
>> whereToSplit - which dimension of the tensor is to split >> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits >> splitNum - how many splits
*/ */
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum) void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
{ {
CheckNTErrors((s && t), "Invalid tensors!"); CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)), CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)),
...@@ -108,9 +109,9 @@ void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum) ...@@ -108,9 +109,9 @@ void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum)
mem->AllocBuf(mem->devID, blockNum * sizeof(int)) : mem->AllocBuf(mem->devID, blockNum * sizeof(int)) :
XMemAlloc(mem->devID, blockNum * sizeof(int))); XMemAlloc(mem->devID, blockNum * sizeof(int)));
MakeSplitBlockIndex(blockIndex, splitNum, blockSplitSize, blockNum, mem); _MakeSplitBlockIndex(blockIndex, splitNum, blockSplitSize, blockNum, mem);
CopyBlocksOnSite(s->data, realBlockSize, blockNum, dataTMP, blockIndex, mem); _CopyBlocksOnSite(s->data, realBlockSize, blockNum, dataTMP, blockIndex, mem);
if (mem != NULL) if (mem != NULL)
mem->ReleaseBuf(mem->devID, blockNum * sizeof(int)); mem->ReleaseBuf(mem->devID, blockNum * sizeof(int));
...@@ -130,15 +131,52 @@ void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum) ...@@ -130,15 +131,52 @@ void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum)
} }
/* /*
split a big tensor into small tensors transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) (return a XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor
>> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits
<< return - teh transformed tensor by splitting it
*/
XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
{
CheckNTErrors(&s, "Invalid tensors!");
int order = s.order + 1;
int * dimSize = new int[order];
for (int i = 0; i < s.order; i++) {
if (i == whereToSplit)
dimSize[i] = s.dimSize[i] / splitNum;
else
dimSize[i] = s.dimSize[i];
}
dimSize[-1] = splitNum;
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem);
t.SetZeroAll();
t.SetTMP();
/* call _Split function */
_Split(&s, &t, whereToSplit, splitNum);
/* destroy variables */
delete dimSize;
return t;
}
/*
split a big tensor into small tensors.
>> big - the source tensor >> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return) >> smalls - the list that keeps the resulting tensors (for return)
NOTE that all the "small" tensors have already been NOTE that all the "small" tensors have already been placed in the list in advance.
placed in the list in advance.
>> whereToSplit - which dimension of the tensor is to split >> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits >> splitNum - how many splits
*/ */
void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum) void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
{ {
CheckNTErrors((smalls != NULL), "Invalid list!"); CheckNTErrors((smalls != NULL), "Invalid list!");
CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!"); CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!");
...@@ -211,7 +249,7 @@ void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum) ...@@ -211,7 +249,7 @@ void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum)
tensorTMP->data = dataTMP; tensorTMP->data = dataTMP;
Split(big, tensorTMP, whereToSplit, splitNum); _Split(big, tensorTMP, whereToSplit, splitNum);
/* copy from tmp to target */ /* copy from tmp to target */
if (!uniform) { if (!uniform) {
...@@ -234,4 +272,45 @@ void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum) ...@@ -234,4 +272,45 @@ void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum)
XMemFree(mem->devID, dataTMP); XMemFree(mem->devID, dataTMP);
} }
} }
/*
split a big tensor into small tensors (returna a XList struture).
make a new list to keep the result and return it.
>> big - the source tensor
>> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits
<< return - a list of small tensors by splitting a big tensor
*/
XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
{
CheckNTErrors(&big, "Invalid tensors!");
XList smalls = XList(splitNum);
int order = big.order;
int * dimSize = new int[order];
for (int i = 0; i < big.order; i++) {
if (i != whereToSplit)
dimSize[i] = big.dimSize[i];
else
dimSize[i] = big.dimSize[i] / splitNum;
}
for (int i = 0; i < splitNum; i++) {
XTensor tensor = NewTensor(order, dimSize, big.dataType, big.denseRatio, big.devID, big.mem);
tensor.SetZeroAll();
tensor.SetTMP();
smalls.Add(&tensor);
}
/* call _Split function */
_Split(&big, &smalls, whereToSplit, splitNum);
/* destroy variables */
delete dimSize;
return smalls;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -26,12 +26,27 @@ ...@@ -26,12 +26,27 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */ /*
extern "C" transform a tensor by splitting it
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum); e.g., (M, N) -> (M, N/3, 3)
*/
void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum);
/*
transform a tensor by splitting it (return a XTensor structure)
make a new tensor to keep the result and return it
e.g., (M, N) -> (M, N/3, 3)
*/
XTensor Split(const XTensor &s, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */ /* split a big tensor into small tensors */
void Split(XTensor * big, XList * smalls, int whereToSplit, int splitNum); void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum);
/*
split a big tensor into small tensors (return a XList structure)
make a new list to keep the result and return it
*/
XList SplitList(const XTensor &big, int whereToSplit, int splitNum);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,17 +29,25 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,17 +29,25 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define transpose _Transpose_ #define transpose _Transpose_
/* generate a transposed 1D/2D tensor: b = transposed(a) */ /*
void Transpose(XTensor * a, XTensor * b); generate a transposed 1D/2D tensor
b = transposed(a)
/* transpose a 1D/2D tensor on site: a = transposed(a) */ */
void Transpose_(XTensor * a); void _Transpose(XTensor * a, XTensor * b);
/* make a transposed 1D/2D tensor: b = transposed(a) and return its pointer */ /*
XTensor * _Transpose(XTensor * a); transpose a 1D/2D tensor (do it on site).
keep the result in the input tensor and return nothing.
a = transposed(a)
*/
void _TransposeMe(XTensor * a);
/* make a transposed 1D/2D tensor: b = transposed(a) and return its body */ /*
XTensor & _Transpose_(XTensor & a); make a transposed 1D/2D tensor (return a XTensor structure).
make a new tensor to keep the result and return it.
b = transposed(a)
*/
XTensor Transpose(XTensor &a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,13 +28,15 @@ ...@@ -28,13 +28,15 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) insert a dimension by copying the blocks for x times
(where x is the size of the inerted dimension)
>> a - input tensor >> a - input tensor
>> b - output tensor >> b - output tensor
>> dim - where to insert the dimension >> dim - where to insert the dimension
>> dSize - size of the newly-inserted dimension >> dSize - size of the newly-inserted dimension
*/ */
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize) void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
{ {
CheckNTErrors((a && b), "Empty input tensors!"); CheckNTErrors((a && b), "Empty input tensors!");
CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!"); CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!");
...@@ -70,7 +72,7 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize) ...@@ -70,7 +72,7 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize)
if (a->devID >= 0 || b->devID >= 0) { if (a->devID >= 0 || b->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaUnsqueeze(a, b, dim, dSize); _CudaUnsqueeze(a, b, dim, dSize);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
...@@ -87,11 +89,50 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize) ...@@ -87,11 +89,50 @@ void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize)
} }
} }
MergeBlockLists(sourceArrays, blockSizes, 1, b->data, b->mem); _MergeBlockLists(sourceArrays, blockSizes, 1, b->data, b->mem);
delete sourceArrays; delete sourceArrays;
delete[] blockSizes; delete[] blockSizes;
} }
} }
/*
insert a dimension by copying the blocks for x times
(where x is the size of the inerted dimension) (returna a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> dim - where to insert the dimension
>> dSize - size of the newly-inserted dimension
<< return - a tensor by inserting a dimension by copying the blocks for x times
*/
XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
{
CheckNTErrors(&a, "Empty input tensors!");
int order = a.order + 1;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
if (i < dim)
dimSize[i] = a.dimSize[i];
else if (i == dim)
dimSize[i] = dSize;
else
dimSize[i] = a.dimSize[i - 1];
}
XTensor b = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem);
b.SetZeroAll();
b.SetTMP();
/* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize);
/* destroy variables */
delete dimSize;
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -67,7 +67,7 @@ insert a dimension by copying the blocks for x times (where x is the size of the ...@@ -67,7 +67,7 @@ insert a dimension by copying the blocks for x times (where x is the size of the
>> dSize - size of the newly-inserted dimension >> dSize - size of the newly-inserted dimension
*/ */
extern "C" extern "C"
void CudaUnsqueeze(XTensor * a, XTensor * b, int dim, int dSize) void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
{ {
int blockSize = 1; int blockSize = 1;
int blockNumA = 1; int blockNumA = 1;
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* duplicate the data along a given dimension */ /* duplicate the data along a given dimension */
extern "C" extern "C"
void CudaUnsqueeze(XTensor * a, XTensor * b, int dim, int dSize); void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -26,10 +26,16 @@ ...@@ -26,10 +26,16 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */ /* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
extern "C" extern "C"
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize); void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize);
/*
insert a dimension by copying the blocks for x times
(where x is the size of the inerted dimension) (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Unsqueeze(const XTensor &a, int dim, int dSize);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,11 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,11 +29,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
sort the tensor along a given dimension sort the tensor along a given dimension
>> a - the tensor >> a - the tensor
>> index - index of the items in the resulting tensor >> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed >> dim - the dimension along which the sorting is performed
*/ */
void Sort(XTensor * a, XTensor * index, int dim) void _Sort(XTensor * a, XTensor * index, int dim)
{ {
CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!"); CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!");
CheckNTErrors((a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
...@@ -45,7 +46,7 @@ void Sort(XTensor * a, XTensor * index, int dim) ...@@ -45,7 +46,7 @@ void Sort(XTensor * a, XTensor * index, int dim)
if (a->devID >= 0) { if (a->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaSortBig(a, a, index, index, dim); _CudaSortBig(a, a, index, index, dim);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -210,7 +210,7 @@ sort the tensor along a given dimension ...@@ -210,7 +210,7 @@ sort the tensor along a given dimension
>> dim - specified dimension >> dim - specified dimension
>> k - top-k results are returned >> k - top-k results are returned
*/ */
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k) void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k)
{ {
CheckNTErrors((a && b), "Empty input tensor!"); CheckNTErrors((a && b), "Empty input tensor!");
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!"); CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!");
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* sort the tensor along a given dimension */ /* sort the tensor along a given dimension */
extern "C" extern "C"
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1); void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* sort the data along a given dimension */ /* sort the data along a given dimension */
extern "C" extern "C"
void Sort(XTensor * a, XTensor * index, int dim); void _Sort(XTensor * a, XTensor * index, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -34,7 +34,7 @@ get the top-k items along a given dimension ...@@ -34,7 +34,7 @@ get the top-k items along a given dimension
>> dim - the dimension along which the sorting is performed >> dim - the dimension along which the sorting is performed
>> k - how many items returned after sorting >> k - how many items returned after sorting
*/ */
void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
{ {
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched input tensors!"); CheckNTErrors((a->unitSize == b->unitSize), "Unmatched input tensors!");
CheckNTErrors((a->order == b->order), "Unmatched input tensors!"); CheckNTErrors((a->order == b->order), "Unmatched input tensors!");
...@@ -55,7 +55,7 @@ void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -55,7 +55,7 @@ void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
if (a->devID >= 0 || b->devID >= 0) { if (a->devID >= 0 || b->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaTopK(a, b, index, dim, k); _CudaTopK(a, b, index, dim, k);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -370,7 +370,7 @@ get the top-k items along a given dimension ...@@ -370,7 +370,7 @@ get the top-k items along a given dimension
>> dim - the dimension along which the sorting is performed >> dim - the dimension along which the sorting is performed
>> k - how many items returned after sorting >> k - how many items returned after sorting
*/ */
void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
{ {
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched input tensors!"); CheckNTErrors((a->unitSize == b->unitSize), "Unmatched input tensors!");
CheckNTErrors((a->order == b->order), "Unmatched input tensors!"); CheckNTErrors((a->order == b->order), "Unmatched input tensors!");
...@@ -439,7 +439,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -439,7 +439,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
/* make the index tensor */ /* make the index tensor */
indexA->SetAscendingOrder(dim); indexA->SetAscendingOrder(dim);
CudaSortBig(a, b, indexA, index, dim, k); _CudaSortBig(a, b, indexA, index, dim, k);
if (a->mem != NULL) if (a->mem != NULL)
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int)); a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
......
...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get the top-k items along a given dimension */ /* get the top-k items along a given dimension */
extern "C" extern "C"
void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k); void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get the top-k items along a given dimension */ /* get the top-k items along a given dimension */
extern "C" extern "C"
void TopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k); void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -33,11 +33,11 @@ y = 1 if x > 1 ...@@ -33,11 +33,11 @@ y = 1 if x > 1
>> x - input tensor >> x - input tensor
>> y - result >> y - result
*/ */
void HardTanH(XTensor * x, XTensor * y) void _HardTanH(const XTensor * x, XTensor * y)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){ if(x->devID >= 0 || y->devID >= 0){
CudaHardTanH(x, y); _CudaHardTanH(x, y);
return; return;
} }
#endif #endif
......
...@@ -60,7 +60,7 @@ y = 1 if x > 1 ...@@ -60,7 +60,7 @@ y = 1 if x > 1
>> x - input tensor >> x - input tensor
>> y - output tensor >> y - output tensor
*/ */
void CudaHardTanH(XTensor * x, XTensor * y) void _CudaHardTanH(const XTensor * x, XTensor * y)
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
......
...@@ -36,7 +36,7 @@ y = 1 if x > 1 ...@@ -36,7 +36,7 @@ y = 1 if x > 1
-1 if x < -1 -1 if x < -1
*/ */
extern "C" extern "C"
void CudaHardTanH(XTensor * input, XTensor * output); void _CudaHardTanH(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
......
...@@ -34,7 +34,7 @@ y = 1 if x > 1 ...@@ -34,7 +34,7 @@ y = 1 if x > 1
-1 if x < -1 -1 if x < -1
*/ */
extern "C" extern "C"
void HardTanH(XTensor * x, XTensor * y); void _HardTanH(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
......
...@@ -30,9 +30,9 @@ identity function y = x ...@@ -30,9 +30,9 @@ identity function y = x
>> x - input tensor >> x - input tensor
>> y - result >> y - result
*/ */
void Identity(XTensor * x, XTensor * y) void _Identity(const XTensor * x, XTensor * y)
{ {
CopyValues(x, y); _CopyValues(x, y);
} }
/* /*
...@@ -61,7 +61,7 @@ void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -61,7 +61,7 @@ void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
LossBackward(dedy, gold, y, lossName); LossBackward(dedy, gold, y, lossName);
if(dedy->data != dedx->data) if(dedy->data != dedx->data)
CopyValues(dedy, dedx); _CopyValues(dedy, dedx);
} }
else else
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* identity function y = x */ /* identity function y = x */
extern "C" extern "C"
void Identity(XTensor * x, XTensor * y); void _Identity(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
......
...@@ -35,7 +35,7 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i}) ...@@ -35,7 +35,7 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i})
>> y - result >> y - result
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
*/ */
void LogSoftmax(XTensor * x, XTensor * y, int leadDim) void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
{ {
int leadDimRDI = x->order - leadDim - 1; int leadDimRDI = x->order - leadDim - 1;
if (!x->isSparse && !y->isSparse && if (!x->isSparse && !y->isSparse &&
...@@ -73,8 +73,8 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim) ...@@ -73,8 +73,8 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim)
max->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, max->unitNum * max->unitSize) : XMemAlloc(max->devID, max->unitNum * max->unitSize); max->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, max->unitNum * max->unitSize) : XMemAlloc(max->devID, max->unitNum * max->unitSize);
sum->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, sum->unitNum * sum->unitSize) : XMemAlloc(sum->devID, sum->unitNum * sum->unitSize); sum->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, sum->unitNum * sum->unitSize) : XMemAlloc(sum->devID, sum->unitNum * sum->unitSize);
ReduceMax(x, max, leadDim); _ReduceMax(x, max, leadDim);
ReduceSum(x, sum, leadDim, max, 1.0F, true); _ReduceSum(x, sum, leadDim, max, 1.0F, true);
if (x->devID >= 0) { if (x->devID >= 0) {
int dims[2]; int dims[2];
......
...@@ -36,7 +36,7 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) ...@@ -36,7 +36,7 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version)
>> y - result >> y - result
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
*/ */
void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim) void _CudaLogSoftmax(const XTensor * x, XTensor * y, int leadDim)
{ {
ShowNTErrors("You should call LogSoftmax instead!"); ShowNTErrors("You should call LogSoftmax instead!");
} }
......
...@@ -29,10 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,10 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) */
extern "C" extern "C"
void CudaLogSotmax(XTensor * input, XTensor * output, int leadDim); void _CudaLogSoftmax(const XTensor * input, XTensor * output, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (Cuda version) */
extern "C" extern "C"
......
...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */
extern "C" extern "C"
void LogSoftmax(XTensor * x, XTensor * y, int leadDim); void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* de/dx */ /* de/dx */
extern "C" extern "C"
......
...@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == SQUAREDERROR){ if(LFName == SQUAREDERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem); XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
_Sum(gold, y, diff, -1.0F); _Sum(gold, y, diff, -1.0F);
Power(diff, 2.0F); _Power(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F); _ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order; int reduceTimes = diff->order;
...@@ -87,7 +87,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -87,7 +87,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int)); memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem); XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0; int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace); _ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) { if (diffNew->order == 1) {
diffNew->order = 2; diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0]; diffNew->dimSize[1] = diffNew->dimSize[0];
...@@ -109,10 +109,10 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -109,10 +109,10 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
*/ */
if(LFName == CROSSENTROPY){ if(LFName == CROSSENTROPY){
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem); XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
CopyValues(y, diff); _CopyValues(y, diff);
Log(diff); _Log(diff);
_Multiply(gold, diff, diff); _Multiply(gold, diff, diff);
Negate(diff); _Negate(diff);
int reduceTimes = diff->order; int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) { for (int i = 0; i < reduceTimes; i++) {
...@@ -121,7 +121,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -121,7 +121,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int)); memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem); XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0; int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace); _ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) { if (diffNew->order == 1) {
diffNew->order = 2; diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0]; diffNew->dimSize[1] = diffNew->dimSize[0];
...@@ -145,10 +145,10 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -145,10 +145,10 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == ONEHOTERROR){ if(LFName == ONEHOTERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem); XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
XTensor * yOnehot = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem); XTensor * yOnehot = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
CopyValues(y, yOnehot); _CopyValues(y, yOnehot);
_Multiply(gold, y, yOnehot); _Multiply(gold, y, yOnehot);
_Sum(gold, yOnehot, diff, -1.0F); _Sum(gold, yOnehot, diff, -1.0F);
Power(diff, 2.0F); _Power(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F); _ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order; int reduceTimes = diff->order;
...@@ -158,7 +158,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -158,7 +158,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int)); memcpy(diffDimSize, diff->dimSize + 1, diffOrder * sizeof(int));
XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem); XTensor * diffNew = NewTensor(diffOrder, diffDimSize, X_FLOAT, 1.0F, diff->devID, diff->mem);
int reducePlace = diff->dimSize[0] == 1 ? 1 : 0; int reducePlace = diff->dimSize[0] == 1 ? 1 : 0;
ReduceSum(diff, diffNew, reducePlace); _ReduceSum(diff, diffNew, reducePlace);
if (diffNew->order == 1) { if (diffNew->order == 1) {
diffNew->order = 2; diffNew->order = 2;
diffNew->dimSize[1] = diffNew->dimSize[0]; diffNew->dimSize[1] = diffNew->dimSize[0];
......
...@@ -29,11 +29,11 @@ rectify function y = max(0, x) ...@@ -29,11 +29,11 @@ rectify function y = max(0, x)
>> input - input tensor >> input - input tensor
>> output - result >> output - result
*/ */
void Rectify(XTensor * x, XTensor * y) void _Rectify(const XTensor * x, XTensor * y)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
if(y->devID >= 0 || y->devID >= 0){ if(y->devID >= 0 || y->devID >= 0){
CudaRectify(x, y); _CudaRectify(x, y);
return; return;
} }
#endif #endif
......
...@@ -54,7 +54,7 @@ rectify function y = max(0, x) ...@@ -54,7 +54,7 @@ rectify function y = max(0, x)
>> x - input tensor >> x - input tensor
>> y - result >> y - result
*/ */
void CudaRectify(XTensor * x, XTensor * y) void _CudaRectify(const XTensor * x, XTensor * y)
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
......
...@@ -31,7 +31,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -31,7 +31,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* rectify function y = max(0, x) (Cuda version) */ /* rectify function y = max(0, x) (Cuda version) */
extern "C" extern "C"
void CudaRectify(XTensor * input, XTensor * output); void _CudaRectify(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
......
...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* rectify function y = max(0, x) */ /* rectify function y = max(0, x) */
extern "C" extern "C"
void Rectify(XTensor * x, XTensor * y); void _Rectify(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
......
...@@ -30,11 +30,11 @@ sigmoid function y = 1/(1+exp(-x)) ...@@ -30,11 +30,11 @@ sigmoid function y = 1/(1+exp(-x))
>> x - input tensor >> x - input tensor
>> y - result >> y - result
*/ */
void Sigmoid(XTensor * x, XTensor * y) void _Sigmoid(const XTensor * x, XTensor * y)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){ if(x->devID >= 0 || y->devID >= 0){
CudaSigmoid(x, y); _CudaSigmoid(x, y);
return; return;
} }
#endif #endif
......
...@@ -58,7 +58,7 @@ sigmoid function y = 1/(1+exp(-x)) (Cuda version) ...@@ -58,7 +58,7 @@ sigmoid function y = 1/(1+exp(-x)) (Cuda version)
>> x - input vector >> x - input vector
>> y - result >> y - result
*/ */
void CudaSigmoid(XTensor * x, XTensor * y) void _CudaSigmoid(const XTensor * x, XTensor * y)
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
......
...@@ -31,7 +31,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -31,7 +31,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* rectify function y = max(0, x) (Cuda version) */ /* rectify function y = max(0, x) (Cuda version) */
extern "C" extern "C"
void CudaSigmoid(XTensor * input, XTensor * output); void _CudaSigmoid(const XTensor * input, XTensor * output);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
......
...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* sigmoid function y = 1/(1+exp(-x)) */ /* sigmoid function y = 1/(1+exp(-x)) */
extern "C" extern "C"
void Sigmoid(XTensor * x, XTensor * y); void _Sigmoid(const XTensor * x, XTensor * y);
/* de/dx */ /* de/dx */
extern "C" extern "C"
......
...@@ -34,7 +34,7 @@ softmax y = e^x / \sum_{i} e^{x_i} ...@@ -34,7 +34,7 @@ softmax y = e^x / \sum_{i} e^{x_i}
>> y - result >> y - result
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
*/ */
void Softmax(XTensor * x, XTensor * y, int leadDim) void _Softmax(const XTensor * x, XTensor * y, int leadDim)
{ {
int leadDimRDI = x->order - leadDim - 1; int leadDimRDI = x->order - leadDim - 1;
if(!x->isSparse && !y->isSparse && x->dataType == y->dataType){ if(!x->isSparse && !y->isSparse && x->dataType == y->dataType){
...@@ -56,12 +56,12 @@ void Softmax(XTensor * x, XTensor * y, int leadDim) ...@@ -56,12 +56,12 @@ void Softmax(XTensor * x, XTensor * y, int leadDim)
max->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, max->unitNum * max->unitSize) : XMemAlloc(max->devID, max->unitNum * max->unitSize); max->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, max->unitNum * max->unitSize) : XMemAlloc(max->devID, max->unitNum * max->unitSize);
sum->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, sum->unitNum * sum->unitSize) : XMemAlloc(sum->devID, sum->unitNum * sum->unitSize); sum->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, sum->unitNum * sum->unitSize) : XMemAlloc(sum->devID, sum->unitNum * sum->unitSize);
ReduceMax(x, max, leadDim); _ReduceMax(x, max, leadDim);
ReduceSum(x, sum, leadDim, max, 1.0F, true); _ReduceSum(x, sum, leadDim, max, 1.0F, true);
if(x->devID >= 0){ if(x->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
CudaSoftmaxSumMax(x, y, leadDim, sum, max); _CudaSoftmaxSumMax(x, y, leadDim, sum, max);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -39,7 +39,7 @@ softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) ...@@ -39,7 +39,7 @@ softmax y = e^x / \sum_{i} e^{x_i} (Cuda version)
>> y - result >> y - result
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
*/ */
void CudaSoftmax(XTensor * x, XTensor * y, int leadDim) void _CudaSoftmax(const XTensor * x, XTensor * y, int leadDim)
{ {
ShowNTErrors("You should call Softmax instead!"); ShowNTErrors("You should call Softmax instead!");
} }
...@@ -163,7 +163,7 @@ softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) ...@@ -163,7 +163,7 @@ softmax y = e^x / \sum_{i} e^{x_i} (Cuda version)
>> sum - \sum_{i} e^{x_i} >> sum - \sum_{i} e^{x_i}
>> max - \max_{i} e^{x_i} >> max - \max_{i} e^{x_i}
*/ */
void CudaSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max) void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max)
{ {
CheckNTErrors((x->devID >= 0), "Forward computation of softmax must be run on GPUs."); CheckNTErrors((x->devID >= 0), "Forward computation of softmax must be run on GPUs.");
CheckNTErrors((x->devID == y->devID), "Tensors used in softmax are not on the same GPU."); CheckNTErrors((x->devID == y->devID), "Tensors used in softmax are not on the same GPU.");
...@@ -289,10 +289,10 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -289,10 +289,10 @@ void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* \beta = \sum_i (dE/dy_i * y_i) */ /* \beta = \sum_i (dE/dy_i * y_i) */
_Multiply(dedy, y, ytmp, 0, 0); _Multiply(dedy, y, ytmp, 0, 0);
ReduceSum(ytmp, beta, leadDim); _ReduceSum(ytmp, beta, leadDim);
/* ytmp = dE/dy_j - \beta */ /* ytmp = dE/dy_j - \beta */
Unsqueeze(beta, ytmp, leadDim, y->dimSize[leadDim]); _Unsqueeze(beta, ytmp, leadDim, y->dimSize[leadDim]);
_Sum(dedy, ytmp, ytmp, -1.0F); _Sum(dedy, ytmp, ytmp, -1.0F);
/* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */ /* dE/ds_j = y_j * ytmp = y_j * (dE/dy_j - \beta) */
......
...@@ -31,11 +31,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,11 +31,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */ /* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C" extern "C"
void CudaSotmax(XTensor * input, XTensor * output, int leadDim); void _CudaSotmax(const XTensor * input, XTensor * output, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */ /* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C" extern "C"
void CudaSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max); void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * sum, XTensor * max);
/* de/dx (Cuda version) */ /* de/dx (Cuda version) */
extern "C" extern "C"
......
...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} */ /* softmax y = e^x / \sum_{i} e^{x_i} */
extern "C" extern "C"
void Softmax(XTensor * x, XTensor * y, int leadDim); void _Softmax(const XTensor * x, XTensor * y, int leadDim);
/* de/dx */ /* de/dx */
extern "C" extern "C"
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "TAbsolute.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Absolute function.
Set every entry to its absolute value.
*/
bool TestAbsolute1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.5F, -4.0F},
{0.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{0.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call Absolute function */
_Absolute(a);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call Absolute function */
_Absolute(aGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Absolute Function */
bool TestAbsolute()
{
XPRINT(0, stdout, "[TEST Absolute] set every entry to its absolute value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestAbsolute1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_ABSOLUTE_H__
#define __TEST_ABSOLUTE_H__
#include "../core/arithmetic/Absolute.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Absolute Function */
extern "C"
bool TestAbsolute();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_ABSOLUTE_H__
...@@ -87,7 +87,7 @@ bool TestConcatenate1() ...@@ -87,7 +87,7 @@ bool TestConcatenate1()
sList->Add(s2); sList->Add(s2);
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sList, t, 1); _Concatenate(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -114,7 +114,7 @@ bool TestConcatenate1() ...@@ -114,7 +114,7 @@ bool TestConcatenate1()
sList->Add(sGPU2); sList->Add(sGPU2);
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sList, tGPU, 1); _Concatenate(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -212,7 +212,7 @@ bool TestConcatenate2() ...@@ -212,7 +212,7 @@ bool TestConcatenate2()
sList->Add(s2); sList->Add(s2);
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sList, t, 0); _Concatenate(sList, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -239,7 +239,7 @@ bool TestConcatenate2() ...@@ -239,7 +239,7 @@ bool TestConcatenate2()
sList->Add(sGPU2); sList->Add(sGPU2);
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sList, tGPU, 0); _Concatenate(sList, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -335,7 +335,7 @@ bool TestConcatenate3() ...@@ -335,7 +335,7 @@ bool TestConcatenate3()
sList->Add(s2); sList->Add(s2);
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sList, t, 1); _Concatenate(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -362,7 +362,7 @@ bool TestConcatenate3() ...@@ -362,7 +362,7 @@ bool TestConcatenate3()
sList->Add(sGPU2); sList->Add(sGPU2);
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sList, tGPU, 1); _Concatenate(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -451,7 +451,7 @@ bool TestConcatenate4() ...@@ -451,7 +451,7 @@ bool TestConcatenate4()
t->SetZeroAll(); t->SetZeroAll();
/* call Concatenate function */ /* call Concatenate function */
Concatenate(s1, s2, t, 1); _Concatenate(s1, s2, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -471,7 +471,7 @@ bool TestConcatenate4() ...@@ -471,7 +471,7 @@ bool TestConcatenate4()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call Concatenate function */ /* call Concatenate function */
Concatenate(sGPU1, sGPU2, tGPU, 1); _Concatenate(sGPU1, sGPU2, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -88,7 +88,7 @@ bool TestConcatenateSolely1() ...@@ -88,7 +88,7 @@ bool TestConcatenateSolely1()
sList->Add(s2); sList->Add(s2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(sList, t, 1); _ConcatenateSolely(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -115,7 +115,7 @@ bool TestConcatenateSolely1() ...@@ -115,7 +115,7 @@ bool TestConcatenateSolely1()
sList->Add(sGPU2); sList->Add(sGPU2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(sList, tGPU, 1); _ConcatenateSolely(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -213,7 +213,7 @@ bool TestConcatenateSolely2() ...@@ -213,7 +213,7 @@ bool TestConcatenateSolely2()
sList->Add(s2); sList->Add(s2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(sList, t, 0); _ConcatenateSolely(sList, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -240,7 +240,7 @@ bool TestConcatenateSolely2() ...@@ -240,7 +240,7 @@ bool TestConcatenateSolely2()
sList->Add(sGPU2); sList->Add(sGPU2);
/* call concatenatesolely function */ /* call concatenatesolely function */
ConcatenateSolely(sList, tGPU, 0); _ConcatenateSolely(sList, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -336,7 +336,7 @@ bool TestConcatenateSolely3() ...@@ -336,7 +336,7 @@ bool TestConcatenateSolely3()
sList->Add(s2); sList->Add(s2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(sList, t, 1); _ConcatenateSolely(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -363,7 +363,7 @@ bool TestConcatenateSolely3() ...@@ -363,7 +363,7 @@ bool TestConcatenateSolely3()
sList->Add(sGPU2); sList->Add(sGPU2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(sList, tGPU, 1); _ConcatenateSolely(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "TConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test ConvertDataType function.
In this case, the flaot32 data type is converted to int32 data type.
*/
bool TestConvertDataType1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{5.0F, 6.0F} };
int answer[3][2] = { {1, 2},
{0, 4},
{5, 6} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize, X_INT);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test ConvertDataType function.
In this case, the int32 data type is converted to float32 data type.
*/
bool TestConvertDataType2()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
int aData[3][2] = { {1, 2},
{0, 4},
{5, 6} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.0F, 4.0F},
{5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize, X_INT);
XTensor * b = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ConvertDataType Function */
bool TestConvertDataType()
{
XPRINT(0, stdout, "[TEST ConvertDataType] convert data type \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestConvertDataType1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestConvertDataType2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_CONVERTDATATYPE_H__
#define __TEST_CONVERTDATATYPE_H__
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ConvertDataType Function */
extern "C"
bool TestConvertDataType();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CONVERTDATATYPE_H__
...@@ -71,23 +71,24 @@ bool TestCopyIndexed1() ...@@ -71,23 +71,24 @@ bool TestCopyIndexed1()
int tgtIndex[2] = {0, 1}; int tgtIndex[2] = {0, 1};
int copyNum = 1; int copyNum = 1;
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t->SetZeroAll();
/* call CopyIndexed function */ /* call CopyIndexed function */
CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUser = CopyIndexed(*s, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -96,16 +97,18 @@ bool TestCopyIndexed1() ...@@ -96,16 +97,18 @@ bool TestCopyIndexed1()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call CopyIndexed function */ /* call CopyIndexed function */
CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUserGPU = CopyIndexed(*sGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -181,16 +184,18 @@ bool TestCopyIndexed2() ...@@ -181,16 +184,18 @@ bool TestCopyIndexed2()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t->SetZeroAll();
/* call CopyIndexed function */ /* call CopyIndexed function */
CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUser = CopyIndexed(*s, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -199,16 +204,18 @@ bool TestCopyIndexed2() ...@@ -199,16 +204,18 @@ bool TestCopyIndexed2()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call CopyIndexed function */ /* call CopyIndexed function */
CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUserGPU = CopyIndexed(*sGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -284,16 +291,18 @@ bool TestCopyIndexed3() ...@@ -284,16 +291,18 @@ bool TestCopyIndexed3()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t->SetZeroAll();
/* call CopyIndexed function */ /* call CopyIndexed function */
CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUser = CopyIndexed(*s, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -302,16 +311,18 @@ bool TestCopyIndexed3() ...@@ -302,16 +311,18 @@ bool TestCopyIndexed3()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call CopyIndexed function */ /* call CopyIndexed function */
CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUserGPU = CopyIndexed(*sGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -49,16 +49,19 @@ bool TestCopyValues1() ...@@ -49,16 +49,19 @@ bool TestCopyValues1()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(sOrder, sDimSize); XTensor * t = NewTensor(sOrder, sDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t->SetZeroAll();
/* call CopyValues function */ /* call CopyValues function */
CopyValues(s, t); _CopyValues(s, t);
tUser = CopyValues(*s);
/* check results */ /* check results */
cpuTest = t->CheckData(s->data, sUnitNum); cpuTest = t->CheckData(sData, sUnitNum) && tUser.CheckData(sData, sUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
...@@ -66,21 +69,18 @@ bool TestCopyValues1() ...@@ -66,21 +69,18 @@ bool TestCopyValues1()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetData(sData, sUnitNum); tGPU->SetData(sData, sUnitNum);
/* call CopyValues function */ /* call CopyValues function */
CopyValues(sGPU, tGPU); _CopyValues(sGPU, tGPU);
tUserGPU = CopyValues(*sGPU);
/* check results */ /* check results */
DTYPE * dataGPU = (DTYPE*)sGPU->data; gpuTest = tGPU->CheckData(sData, sUnitNum) && tUser.CheckData(sData, sUnitNum);
int size = sUnitNum * sGPU->unitSize;
char * dataCPU = new char[size];
XMemCopy(dataCPU, -1, dataGPU, sGPU->devID, size);
gpuTest = tGPU->CheckData(dataCPU, sUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -59,7 +59,7 @@ bool TestHardTanH1() ...@@ -59,7 +59,7 @@ bool TestHardTanH1()
y->SetZeroAll(); y->SetZeroAll();
/* call hardtanh function */ /* call hardtanh function */
HardTanH(x, y); _HardTanH(x, y);
/* check results */ /* check results */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F);
...@@ -77,7 +77,7 @@ bool TestHardTanH1() ...@@ -77,7 +77,7 @@ bool TestHardTanH1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call hardtanh function */ /* call hardtanh function */
HardTanH(xGPU, yGPU); _HardTanH(xGPU, yGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
...@@ -152,7 +152,7 @@ bool TestHardTanH2() ...@@ -152,7 +152,7 @@ bool TestHardTanH2()
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call HardTanH function */ /* call HardTanH function */
HardTanH(x, y); _HardTanH(x, y);
/* call HardTanHBackward function */ /* call HardTanHBackward function */
HardTanHBackward(gold, y, x, dedy, dedx, SQUAREDERROR); HardTanHBackward(gold, y, x, dedy, dedx, SQUAREDERROR);
...@@ -181,7 +181,7 @@ bool TestHardTanH2() ...@@ -181,7 +181,7 @@ bool TestHardTanH2()
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call HardTanH function */ /* call HardTanH function */
HardTanH(xGPU, yGPU); _HardTanH(xGPU, yGPU);
/* call hardtanhbackward function */ /* call hardtanhbackward function */
HardTanHBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, SQUAREDERROR); HardTanHBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, SQUAREDERROR);
......
...@@ -57,7 +57,7 @@ bool TestIdentity1() ...@@ -57,7 +57,7 @@ bool TestIdentity1()
y->SetZeroAll(); y->SetZeroAll();
/* call Identity function */ /* call Identity function */
Identity(x, y); _Identity(x, y);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum); cpuTest = y->CheckData(answer, unitNum);
...@@ -75,7 +75,7 @@ bool TestIdentity1() ...@@ -75,7 +75,7 @@ bool TestIdentity1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Identity function */ /* call Identity function */
Identity(xGPU, yGPU); _Identity(xGPU, yGPU);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum); gpuTest = yGPU->CheckData(answer, unitNum);
...@@ -139,7 +139,7 @@ bool TestIdentity2() ...@@ -139,7 +139,7 @@ bool TestIdentity2()
dedy->SetZeroAll(); dedy->SetZeroAll();
/* call Identity function */ /* call Identity function */
Identity(x, y); _Identity(x, y);
/* call IdentityBackward function */ /* call IdentityBackward function */
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY); IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
...@@ -168,7 +168,7 @@ bool TestIdentity2() ...@@ -168,7 +168,7 @@ bool TestIdentity2()
dedyGPU->SetZeroAll(); dedyGPU->SetZeroAll();
/* call Identity function */ /* call Identity function */
Identity(xGPU, yGPU); _Identity(xGPU, yGPU);
/* call IdentityBackward function */ /* call IdentityBackward function */
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "TLog.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Log function.
Set every entry to its log value.
*/
bool TestLog1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{5.0F, 6.0F} };
DTYPE answer[3][2] = { {0.0F, 0.6931F},
{-0.6931F, 1.3863F},
{1.6094F, 1.7918F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call Log function */
_Log(a);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call Log function */
_Log(aGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Log Function */
bool TestLog()
{
XPRINT(0, stdout, "[TEST Log] set every entry to its log value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestLog1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_LOG_H__
#define __TEST_LOG_H__
#include "../core/math/Log.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Log Function */
extern "C"
bool TestLog();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_LOG_H__
...@@ -57,7 +57,7 @@ bool TestLogSoftmax1() ...@@ -57,7 +57,7 @@ bool TestLogSoftmax1()
y->SetZeroAll(); y->SetZeroAll();
/* call LogSoftmax function */ /* call LogSoftmax function */
LogSoftmax(x, y, 1); _LogSoftmax(x, y, 1);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F);
...@@ -75,7 +75,7 @@ bool TestLogSoftmax1() ...@@ -75,7 +75,7 @@ bool TestLogSoftmax1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call LogSoftmax function */ /* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1); _LogSoftmax(xGPU, yGPU, 1);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
...@@ -139,7 +139,7 @@ bool TestLogSoftmax2() ...@@ -139,7 +139,7 @@ bool TestLogSoftmax2()
dedy->SetZeroAll(); dedy->SetZeroAll();
/* call LogSoftmax function */ /* call LogSoftmax function */
LogSoftmax(x, y, 1); _LogSoftmax(x, y, 1);
/* call LogSoftmaxBackward function */ /* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY); LogSoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
...@@ -167,7 +167,7 @@ bool TestLogSoftmax2() ...@@ -167,7 +167,7 @@ bool TestLogSoftmax2()
dedyGPU->SetZeroAll(); dedyGPU->SetZeroAll();
/* call LogSoftmax function */ /* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1); _LogSoftmax(xGPU, yGPU, 1);
/* call LogSoftmaxBackward function */ /* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY); LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
...@@ -243,7 +243,7 @@ bool TestLogSoftmax3() ...@@ -243,7 +243,7 @@ bool TestLogSoftmax3()
dedy->SetZeroAll(); dedy->SetZeroAll();
/* call LogSoftmax function */ /* call LogSoftmax function */
LogSoftmax(x, y, 1); _LogSoftmax(x, y, 1);
/* call LogSoftmaxBackward function */ /* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 1, SQUAREDERROR); LogSoftmaxBackward(g, y, x, dedy, dedx, 1, SQUAREDERROR);
...@@ -271,7 +271,7 @@ bool TestLogSoftmax3() ...@@ -271,7 +271,7 @@ bool TestLogSoftmax3()
dedyGPU->SetZeroAll(); dedyGPU->SetZeroAll();
/* call LogSoftmax function */ /* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1); _LogSoftmax(xGPU, yGPU, 1);
/* call LogSoftmaxBackward function */ /* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, SQUAREDERROR); LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, SQUAREDERROR);
......
...@@ -107,7 +107,7 @@ bool TestMatrixMulBatchedCPU1() ...@@ -107,7 +107,7 @@ bool TestMatrixMulBatchedCPU1()
cList->Add(c2); cList->Add(c2);
/* call MatrixMULBatchedCPU function */ /* call MatrixMULBatchedCPU function */
MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList); _MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList);
/* check results */ /* check results */
cpuTest = c1->CheckData(answer1, cUnitNum) && c2->CheckData(answer2, cUnitNum); cpuTest = c1->CheckData(answer1, cUnitNum) && c2->CheckData(answer2, cUnitNum);
...@@ -146,7 +146,7 @@ bool TestMatrixMulBatchedCPU1() ...@@ -146,7 +146,7 @@ bool TestMatrixMulBatchedCPU1()
cList->Add(cGPU2); cList->Add(cGPU2);
/* call MatrixMULBatchedCPU function */ /* call MatrixMULBatchedCPU function */
MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList); _MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList);
/* check results */ /* check results */
gpuTest = cGPU1->CheckData(answer1, cUnitNum) && gpuTest; gpuTest = cGPU1->CheckData(answer1, cUnitNum) && gpuTest;
......
...@@ -82,7 +82,7 @@ bool TestMatrixMul1() ...@@ -82,7 +82,7 @@ bool TestMatrixMul1()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -102,7 +102,7 @@ bool TestMatrixMul1() ...@@ -102,7 +102,7 @@ bool TestMatrixMul1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -192,7 +192,7 @@ bool TestMatrixMul2() ...@@ -192,7 +192,7 @@ bool TestMatrixMul2()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(s1, X_TRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_TRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -212,7 +212,7 @@ bool TestMatrixMul2() ...@@ -212,7 +212,7 @@ bool TestMatrixMul2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -322,7 +322,7 @@ bool TestMatrixMul3() ...@@ -322,7 +322,7 @@ bool TestMatrixMul3()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -342,7 +342,7 @@ bool TestMatrixMul3() ...@@ -342,7 +342,7 @@ bool TestMatrixMul3()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -441,7 +441,7 @@ bool TestMatrixMul4() ...@@ -441,7 +441,7 @@ bool TestMatrixMul4()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -461,7 +461,7 @@ bool TestMatrixMul4() ...@@ -461,7 +461,7 @@ bool TestMatrixMul4()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -82,7 +82,7 @@ bool TestMatrixMul2D1() ...@@ -82,7 +82,7 @@ bool TestMatrixMul2D1()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul2D function */ /* call MatrixMul2D function */
MatrixMul2D(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul2D(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -102,7 +102,7 @@ bool TestMatrixMul2D1() ...@@ -102,7 +102,7 @@ bool TestMatrixMul2D1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMul2D function */ /* call MatrixMul2D function */
MatrixMul2D(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul2D(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -192,7 +192,7 @@ bool TestMatrixMul2D2() ...@@ -192,7 +192,7 @@ bool TestMatrixMul2D2()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul2D function */ /* call MatrixMul2D function */
MatrixMul2D(s1, X_TRANS, s2, X_NOTRANS, t); _MatrixMul2D(s1, X_TRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -212,7 +212,7 @@ bool TestMatrixMul2D2() ...@@ -212,7 +212,7 @@ bool TestMatrixMul2D2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMul2D function */ /* call MatrixMul2D function */
MatrixMul2D(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul2D(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -82,7 +82,7 @@ bool TestMatrixMul2DParallel1() ...@@ -82,7 +82,7 @@ bool TestMatrixMul2DParallel1()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul2DParallel function */ /* call MatrixMul2DParallel function */
MatrixMul2DParallel(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul2DParallel(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -158,7 +158,7 @@ bool TestMatrixMul2DParallel2() ...@@ -158,7 +158,7 @@ bool TestMatrixMul2DParallel2()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMul2DParallel function */ /* call MatrixMul2DParallel function */
MatrixMul2DParallel(s1, X_TRANS, s2, X_NOTRANS, t); _MatrixMul2DParallel(s1, X_TRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
......
...@@ -82,7 +82,7 @@ bool TestMatrixMulBatched1() ...@@ -82,7 +82,7 @@ bool TestMatrixMulBatched1()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -102,7 +102,7 @@ bool TestMatrixMulBatched1() ...@@ -102,7 +102,7 @@ bool TestMatrixMulBatched1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -200,7 +200,7 @@ bool TestMatrixMulBatched2() ...@@ -200,7 +200,7 @@ bool TestMatrixMulBatched2()
t->SetZeroAll(); t->SetZeroAll();
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -220,7 +220,7 @@ bool TestMatrixMulBatched2() ...@@ -220,7 +220,7 @@ bool TestMatrixMulBatched2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -66,7 +66,7 @@ bool TestMerge1() ...@@ -66,7 +66,7 @@ bool TestMerge1()
t->SetZeroAll(); t->SetZeroAll();
/* call merge function */ /* call merge function */
Merge(s, t, 1, 0); _Merge(s, t, 1, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -84,7 +84,7 @@ bool TestMerge1() ...@@ -84,7 +84,7 @@ bool TestMerge1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call merge function */ /* call merge function */
Merge(sGPU, tGPU, 1, 0); _Merge(sGPU, tGPU, 1, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -173,8 +173,8 @@ bool TestMerge2() ...@@ -173,8 +173,8 @@ bool TestMerge2()
t2->SetZeroAll(); t2->SetZeroAll();
/* call merge function */ /* call merge function */
Merge(s, t1, 1, 0); _Merge(s, t1, 1, 0);
Merge(s, t2, 2, 0); _Merge(s, t2, 2, 0);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
...@@ -194,8 +194,8 @@ bool TestMerge2() ...@@ -194,8 +194,8 @@ bool TestMerge2()
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call merge function */ /* call merge function */
Merge(sGPU, tGPU1, 1, 0); _Merge(sGPU, tGPU1, 1, 0);
Merge(sGPU, tGPU2, 2, 0); _Merge(sGPU, tGPU2, 2, 0);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
...@@ -282,7 +282,7 @@ bool TestMerge3() ...@@ -282,7 +282,7 @@ bool TestMerge3()
smallList->Add(s2); smallList->Add(s2);
/* call merge function */ /* call merge function */
Merge(smallList, t, 0); _Merge(smallList, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -309,7 +309,7 @@ bool TestMerge3() ...@@ -309,7 +309,7 @@ bool TestMerge3()
smallList->Add(sGPU2); smallList->Add(sGPU2);
/* call merge function */ /* call merge function */
Merge(smallList, tGPU, 0); _Merge(smallList, tGPU, 0);
/* check results */ /* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum); cpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -394,7 +394,7 @@ bool TestMerge4() ...@@ -394,7 +394,7 @@ bool TestMerge4()
smallList->Add(s2); smallList->Add(s2);
/* call merge function */ /* call merge function */
Merge(smallList, t, 1); _Merge(smallList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -421,7 +421,7 @@ bool TestMerge4() ...@@ -421,7 +421,7 @@ bool TestMerge4()
smallList->Add(sGPU2); smallList->Add(sGPU2);
/* call merge function */ /* call merge function */
Merge(smallList, tGPU, 1); _Merge(smallList, tGPU, 1);
/* check results */ /* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum); cpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -74,17 +74,23 @@ bool TestMultiply1() ...@@ -74,17 +74,23 @@ bool TestMultiply1()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * tMe = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
tMe->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call Multiply function */
_Multiply(s1, s2, t, 0, 0); _Multiply(s1, s2, t, 0, 0);
_MultiplyMe(tMe, s2, 0, 0);
tUser = Multiply(*s1, *s2, 0, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum)
&& tMe->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -94,25 +100,33 @@ bool TestMultiply1() ...@@ -94,25 +100,33 @@ bool TestMultiply1()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call Multiply function */
_Multiply(sGPU1, sGPU2, tGPU, 0, 0); _Multiply(sGPU1, sGPU2, tGPU, 0, 0);
_MultiplyMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Multiply(*sGPU1, *sGPU2, 0, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum)
&& tMeGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
delete tMe;
delete sGPU1; delete sGPU1;
delete sGPU2; delete sGPU2;
delete tGPU; delete tGPU;
delete tMeGPU;
delete[] sDimSize1; delete[] sDimSize1;
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
...@@ -123,6 +137,7 @@ bool TestMultiply1() ...@@ -123,6 +137,7 @@ bool TestMultiply1()
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
delete tMe;
delete[] sDimSize1; delete[] sDimSize1;
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
...@@ -182,17 +197,23 @@ bool TestMultiply2() ...@@ -182,17 +197,23 @@ bool TestMultiply2()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * tMe = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
tMe->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call MultiplyElementWise function */ /* call Multiply function */
_Multiply(s1, s2, t, 0, 0); _Multiply(s1, s2, t, 0, 0);
_MultiplyMe(tMe, s2, 0, 0);
tUser = Multiply(*s1, *s2, 0, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum)
&& tMe->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -202,25 +223,33 @@ bool TestMultiply2() ...@@ -202,25 +223,33 @@ bool TestMultiply2()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call MultiplyElementWise function */ /* call Multiply function */
_Multiply(sGPU1, sGPU2, tGPU, 0, 0); _Multiply(sGPU1, sGPU2, tGPU, 0, 0);
_MultiplyMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Multiply(*sGPU1, *sGPU2, 0, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum)
&& tMeGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
delete tMe;
delete sGPU1; delete sGPU1;
delete sGPU2; delete sGPU2;
delete tGPU; delete tGPU;
delete tMeGPU;
delete[] sDimSize1; delete[] sDimSize1;
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
...@@ -231,6 +260,7 @@ bool TestMultiply2() ...@@ -231,6 +260,7 @@ bool TestMultiply2()
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
delete tMe;
delete[] sDimSize1; delete[] sDimSize1;
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
...@@ -351,10 +381,10 @@ bool TestMultiply3() ...@@ -351,10 +381,10 @@ bool TestMultiply3()
TODO!! TODO!!
*/ */
/* test for MultiplyElementWise Function */ /* test for Multiply Function */
bool TestMultiply() bool TestMultiply()
{ {
XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors \n"); XPRINT(0, stdout, "[TEST Multiply] element-wise product of two tensors \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -53,7 +53,7 @@ bool TestNegate1() ...@@ -53,7 +53,7 @@ bool TestNegate1()
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
Negate(a); _Negate(a);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = a->CheckData(answer, aUnitNum);
...@@ -69,7 +69,7 @@ bool TestNegate1() ...@@ -69,7 +69,7 @@ bool TestNegate1()
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
Negate(aGPU); _Negate(aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum);
...@@ -119,7 +119,7 @@ bool TestNegate2() ...@@ -119,7 +119,7 @@ bool TestNegate2()
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
Negate(a); _Negate(a);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = a->CheckData(answer, aUnitNum);
...@@ -135,7 +135,7 @@ bool TestNegate2() ...@@ -135,7 +135,7 @@ bool TestNegate2()
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
Negate(aGPU); _Negate(aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum);
......
...@@ -108,9 +108,12 @@ bool TestNormalize1() ...@@ -108,9 +108,12 @@ bool TestNormalize1()
XTensor * var = NewTensor(varOrder, varDimSize); XTensor * var = NewTensor(varOrder, varDimSize);
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize); XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * tMe = NewTensor(sOrder, sDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
tMe->SetData(sData, sUnitNum);
mean->SetData(meanData, meanUnitNum); mean->SetData(meanData, meanUnitNum);
var->SetData(varData, varUnitNum); var->SetData(varData, varUnitNum);
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
...@@ -118,10 +121,13 @@ bool TestNormalize1() ...@@ -118,10 +121,13 @@ bool TestNormalize1()
t->SetZeroAll(); t->SetZeroAll();
/* call normalize function */ /* call normalize function */
Normalize(s, t, 0, mean, var, a, b, 0.0F); _Normalize(s, t, 0, mean, var, a, b, 0.0F);
_NormalizeMe(tMe, 0, mean, var, a, b, 0.0F);
tUser = Normalize(*s, 0, *mean, *var, *a, *b, 0.0F);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum, 1e-4F, 0); cpuTest = t->CheckData(answer, tUnitNum, 1e-4F)
&& tMe->CheckData(answer, tUnitNum, 1e-4F) && tUser.CheckData(answer, tUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -134,9 +140,12 @@ bool TestNormalize1() ...@@ -134,9 +140,12 @@ bool TestNormalize1()
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
meanGPU->SetData(meanData, meanUnitNum); meanGPU->SetData(meanData, meanUnitNum);
varGPU->SetData(varData, varUnitNum); varGPU->SetData(varData, varUnitNum);
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
...@@ -144,19 +153,24 @@ bool TestNormalize1() ...@@ -144,19 +153,24 @@ bool TestNormalize1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call Normalize function */ /* call Normalize function */
Normalize(sGPU, tGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0F); _Normalize(sGPU, tGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0F);
_NormalizeMe(tMeGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0F);
tUserGPU = Normalize(*sGPU, 0, *meanGPU, *varGPU, *aGPU, *bGPU, 0.0F);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F, 0); gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F)
&& tMeGPU->CheckData(answer, tUnitNum, 1e-4F) && tUserGPU.CheckData(answer, tUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete s; delete s;
delete tMe;
delete t; delete t;
delete mean; delete mean;
delete var; delete var;
delete a; delete a;
delete b; delete b;
delete sGPU; delete sGPU;
delete tMeGPU;
delete tGPU; delete tGPU;
delete meanGPU; delete meanGPU;
delete varGPU; delete varGPU;
...@@ -173,6 +187,7 @@ bool TestNormalize1() ...@@ -173,6 +187,7 @@ bool TestNormalize1()
#else #else
/* destroy variables */ /* destroy variables */
delete s; delete s;
delete tMe;
delete t; delete t;
delete mean; delete mean;
delete var; delete var;
......
...@@ -57,7 +57,7 @@ bool TestPower1() ...@@ -57,7 +57,7 @@ bool TestPower1()
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
Power(a, 2.0F); _Power(a, 2.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
...@@ -73,7 +73,7 @@ bool TestPower1() ...@@ -73,7 +73,7 @@ bool TestPower1()
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call power function */ /* call power function */
Power(aGPU, 2.0F); _Power(aGPU, 2.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
...@@ -126,7 +126,7 @@ bool TestPower2() ...@@ -126,7 +126,7 @@ bool TestPower2()
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
Power(a, 1.0F); _Power(a, 1.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
...@@ -142,7 +142,7 @@ bool TestPower2() ...@@ -142,7 +142,7 @@ bool TestPower2()
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
Power(aGPU, 1.0F); _Power(aGPU, 1.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
...@@ -195,7 +195,7 @@ bool TestPower3() ...@@ -195,7 +195,7 @@ bool TestPower3()
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
Power(a, 0.0F); _Power(a, 0.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
...@@ -211,7 +211,7 @@ bool TestPower3() ...@@ -211,7 +211,7 @@ bool TestPower3()
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
Power(aGPU, 0.0F); _Power(aGPU, 0.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
......
...@@ -56,7 +56,7 @@ bool TestRectify1() ...@@ -56,7 +56,7 @@ bool TestRectify1()
y->SetZeroAll(); y->SetZeroAll();
/* call Rectify function */ /* call Rectify function */
Rectify(x, y); _Rectify(x, y);
/* check results */ /* check results */
cpuTest = y->CheckData(answer, unitNum); cpuTest = y->CheckData(answer, unitNum);
...@@ -74,7 +74,7 @@ bool TestRectify1() ...@@ -74,7 +74,7 @@ bool TestRectify1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Rectify function */ /* call Rectify function */
Rectify(xGPU, yGPU); _Rectify(xGPU, yGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, unitNum); gpuTest = yGPU->CheckData(answer, unitNum);
...@@ -144,7 +144,7 @@ bool TestRectify2() ...@@ -144,7 +144,7 @@ bool TestRectify2()
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call Rectify function */ /* call Rectify function */
Rectify(x, y); _Rectify(x, y);
/* call RectifyBackward function */ /* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY); RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
...@@ -173,7 +173,7 @@ bool TestRectify2() ...@@ -173,7 +173,7 @@ bool TestRectify2()
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call Rectify function */ /* call Rectify function */
Rectify(xGPU, yGPU); _Rectify(xGPU, yGPU);
/* call rectifybackward function */ /* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
......
...@@ -78,8 +78,8 @@ bool TestReduceMax1() ...@@ -78,8 +78,8 @@ bool TestReduceMax1()
t2->SetZeroAll(); t2->SetZeroAll();
/* call ReduceMax function */ /* call ReduceMax function */
ReduceMax(s, t1, 0); _ReduceMax(s, t1, 0);
ReduceMax(s, t2, 1); _ReduceMax(s, t2, 1);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
...@@ -99,8 +99,8 @@ bool TestReduceMax1() ...@@ -99,8 +99,8 @@ bool TestReduceMax1()
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call ReduceMax function */ /* call ReduceMax function */
ReduceMax(sGPU, tGPU1, 0); _ReduceMax(sGPU, tGPU1, 0);
ReduceMax(sGPU, tGPU2, 1); _ReduceMax(sGPU, tGPU2, 1);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
......
...@@ -73,8 +73,8 @@ bool TestReduceMean1() ...@@ -73,8 +73,8 @@ bool TestReduceMean1()
t2->SetZeroAll(); t2->SetZeroAll();
/* call ReduceMean function */ /* call ReduceMean function */
ReduceMean(s, t1, 0); _ReduceMean(s, t1, 0);
ReduceMean(s, t2, 1); _ReduceMean(s, t2, 1);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
...@@ -94,8 +94,8 @@ bool TestReduceMean1() ...@@ -94,8 +94,8 @@ bool TestReduceMean1()
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call ReduceMean function */ /* call ReduceMean function */
ReduceMean(sGPU, tGPU1, 0); _ReduceMean(sGPU, tGPU1, 0);
ReduceMean(sGPU, tGPU2, 1); _ReduceMean(sGPU, tGPU2, 1);
/* check results */ /* check results */
cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
......
...@@ -78,8 +78,8 @@ bool TestReduceSum1() ...@@ -78,8 +78,8 @@ bool TestReduceSum1()
t2->SetZeroAll(); t2->SetZeroAll();
/* call ReduceSum function */ /* call ReduceSum function */
ReduceSum(s, t1, 0); _ReduceSum(s, t1, 0);
ReduceSum(s, t2, 1); _ReduceSum(s, t2, 1);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
...@@ -99,8 +99,8 @@ bool TestReduceSum1() ...@@ -99,8 +99,8 @@ bool TestReduceSum1()
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call ReduceSum function */ /* call ReduceSum function */
ReduceSum(sGPU, tGPU1, 0); _ReduceSum(sGPU, tGPU1, 0);
ReduceSum(sGPU, tGPU2, 1); _ReduceSum(sGPU, tGPU2, 1);
/* check results */ /* check results */
cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
......
...@@ -77,7 +77,7 @@ bool TestReduceSumSquared1() ...@@ -77,7 +77,7 @@ bool TestReduceSumSquared1()
t->SetZeroAll(); t->SetZeroAll();
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
ReduceSumSquared(s, t, 0, shift); _ReduceSumSquared(s, t, 0, shift);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -97,7 +97,7 @@ bool TestReduceSumSquared1() ...@@ -97,7 +97,7 @@ bool TestReduceSumSquared1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
ReduceSumSquared(sGPU, tGPU, 0, shiftGPU); _ReduceSumSquared(sGPU, tGPU, 0, shiftGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -181,7 +181,7 @@ bool TestReduceSumSquared2() ...@@ -181,7 +181,7 @@ bool TestReduceSumSquared2()
t->SetZeroAll(); t->SetZeroAll();
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
ReduceSumSquared(s, t, 1, shift); _ReduceSumSquared(s, t, 1, shift);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -201,7 +201,7 @@ bool TestReduceSumSquared2() ...@@ -201,7 +201,7 @@ bool TestReduceSumSquared2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
ReduceSumSquared(sGPU, tGPU, 1, shiftGPU); _ReduceSumSquared(sGPU, tGPU, 1, shiftGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
......
...@@ -77,7 +77,7 @@ bool TestReduceVariance1() ...@@ -77,7 +77,7 @@ bool TestReduceVariance1()
t->SetZeroAll(); t->SetZeroAll();
/* call ReduceVariance function */ /* call ReduceVariance function */
ReduceVariance(s, t, 0, mean); _ReduceVariance(s, t, 0, mean);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -97,7 +97,7 @@ bool TestReduceVariance1() ...@@ -97,7 +97,7 @@ bool TestReduceVariance1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call ReduceVariance function */ /* call ReduceVariance function */
ReduceVariance(sGPU, tGPU, 0, meanGPU); _ReduceVariance(sGPU, tGPU, 0, meanGPU);
/* check results */ /* check results */
gpuTest = t->CheckData(answer, tUnitNum); gpuTest = t->CheckData(answer, tUnitNum);
......
...@@ -52,15 +52,22 @@ bool TestScaleAndShift1() ...@@ -52,15 +52,22 @@ bool TestScaleAndShift1()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(sOrder, sDimSize);
XTensor * tMe = NewTensor(sOrder, sDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
tMe->SetData(sData, sUnitNum);
/* call ScaleAndShift function */ /* call ScaleAndShift function */
_ScaleAndShift(s, s, scaleFactor, shiftFactor); _ScaleAndShift(s, t, scaleFactor, shiftFactor);
_ScaleAndShiftMe(tMe, scaleFactor, shiftFactor);
tUser = ScaleAndShift(*s, scaleFactor, shiftFactor);
/* check results */ /* check results */
cpuTest = s->CheckData(answer, sUnitNum); cpuTest = t->CheckData(answer, sUnitNum) &&
tMe->CheckData(answer, sUnitNum) && tUser.CheckData(answer, sUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -68,25 +75,38 @@ bool TestScaleAndShift1() ...@@ -68,25 +75,38 @@ bool TestScaleAndShift1()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* call ScaleAndShift function */ /* call ScaleAndShift function */
_ScaleAndShift(sGPU, sGPU, scaleFactor, shiftFactor); _ScaleAndShift(sGPU, tGPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(tMeGPU, scaleFactor, shiftFactor);
tUserGPU = ScaleAndShift(*sGPU, scaleFactor, shiftFactor);
/* check results */ /* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum); gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) && tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
delete t;
delete tMe;
delete sGPU; delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize; delete[] sDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s; delete s;
delete t;
delete tMe;
delete[] sDimSize; delete[] sDimSize;
return cpuTest; return cpuTest;
......
...@@ -67,16 +67,18 @@ bool TestSelect1() ...@@ -67,16 +67,18 @@ bool TestSelect1()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t->SetZeroAll();
/* call SelectRange function */ /* call SelectRange function */
SelectRange(s, t, 2, 1, 3); _SelectRange(s, t, 2, 1, 3);
tUser = SelectRange(*s, 2, 1, 3);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -85,16 +87,18 @@ bool TestSelect1() ...@@ -85,16 +87,18 @@ bool TestSelect1()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call Select function */ /* call SelectRange function */
SelectRange(sGPU, tGPU, 2, 1, 3); _SelectRange(sGPU, tGPU, 2, 1, 3);
tUserGPU = SelectRange(*sGPU, 2, 1, 3);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -54,7 +54,7 @@ bool TestSigmoid1() ...@@ -54,7 +54,7 @@ bool TestSigmoid1()
y->SetZeroAll(); y->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(x, y); _Sigmoid(x, y);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F);
...@@ -72,7 +72,7 @@ bool TestSigmoid1() ...@@ -72,7 +72,7 @@ bool TestSigmoid1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(xGPU, yGPU); _Sigmoid(xGPU, yGPU);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
...@@ -138,7 +138,7 @@ bool TestSigmoid2() ...@@ -138,7 +138,7 @@ bool TestSigmoid2()
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(x, y); _Sigmoid(x, y);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY); SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
...@@ -167,7 +167,7 @@ bool TestSigmoid2() ...@@ -167,7 +167,7 @@ bool TestSigmoid2()
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(xGPU, yGPU); _Sigmoid(xGPU, yGPU);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "TSign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Sign function.
Set every entry to its sign value.
*/
bool TestSign1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call Sign function */
_Sign(a);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call Sign function */
_Sign(aGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Sign Function */
bool TestSign()
{
XPRINT(0, stdout, "[TEST Sign] set every entry to its sign value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSign1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_SIGN_H__
#define __TEST_SIGN_H__
#include "../core/arithmetic/Sign.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Sign Function */
extern "C"
bool TestSign();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SIGN_H__
...@@ -58,7 +58,7 @@ bool TestSoftmax1() ...@@ -58,7 +58,7 @@ bool TestSoftmax1()
y->SetZeroAll(); y->SetZeroAll();
/* call Softmax function */ /* call Softmax function */
Softmax(x, y, 1); _Softmax(x, y, 1);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F);
...@@ -76,7 +76,7 @@ bool TestSoftmax1() ...@@ -76,7 +76,7 @@ bool TestSoftmax1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Softmax function */ /* call Softmax function */
Softmax(xGPU, yGPU, 1); _Softmax(xGPU, yGPU, 1);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F);
...@@ -139,7 +139,7 @@ bool TestSoftmax2() ...@@ -139,7 +139,7 @@ bool TestSoftmax2()
dedy->SetZeroAll(); dedy->SetZeroAll();
/* call Softmax function */ /* call Softmax function */
Softmax(x, y, 1); _Softmax(x, y, 1);
/* call SoftmaxBackward function */ /* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY); SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
...@@ -167,7 +167,7 @@ bool TestSoftmax2() ...@@ -167,7 +167,7 @@ bool TestSoftmax2()
dedyGPU->SetZeroAll(); dedyGPU->SetZeroAll();
/* call Softmax function */ /* call Softmax function */
Softmax(xGPU, yGPU, 1); _Softmax(xGPU, yGPU, 1);
/* call SoftmaxBackward function */ /* call SoftmaxBackward function */
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY); SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
......
...@@ -53,7 +53,7 @@ bool TestSort1() ...@@ -53,7 +53,7 @@ bool TestSort1()
b->SetZeroAll(); b->SetZeroAll();
/* call Sort function */ /* call Sort function */
Sort(a, b, 0); _Sort(a, b, 0);
cpuTest = a->CheckData(answer, unitNum); cpuTest = a->CheckData(answer, unitNum);
...@@ -70,7 +70,7 @@ bool TestSort1() ...@@ -70,7 +70,7 @@ bool TestSort1()
bGPU->SetZeroAll(); bGPU->SetZeroAll();
/* call sum function */ /* call sum function */
Sort(aGPU, bGPU, 0); _Sort(aGPU, bGPU, 0);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, unitNum); gpuTest = aGPU->CheckData(answer, unitNum);
...@@ -121,7 +121,7 @@ bool TestSort2() ...@@ -121,7 +121,7 @@ bool TestSort2()
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
/* call Sort function */ /* call Sort function */
Sort(a, b, 1); _Sort(a, b, 1);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, unitNum); cpuTest = a->CheckData(answer, unitNum);
...@@ -138,7 +138,7 @@ bool TestSort2() ...@@ -138,7 +138,7 @@ bool TestSort2()
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
/* call sum function */ /* call sum function */
Sort(aGPU, bGPU, 1); _Sort(aGPU, bGPU, 1);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, unitNum); gpuTest = aGPU->CheckData(answer, unitNum);
......
...@@ -71,7 +71,7 @@ bool TestSplit1() ...@@ -71,7 +71,7 @@ bool TestSplit1()
t->SetZeroAll(); t->SetZeroAll();
/* call split function */ /* call split function */
Split(s, t, 0, 2); _Split(s, t, 0, 2);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -89,7 +89,7 @@ bool TestSplit1() ...@@ -89,7 +89,7 @@ bool TestSplit1()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call sum function */ /* call sum function */
Split(sGPU, tGPU, 0, 2); _Split(sGPU, tGPU, 0, 2);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -157,7 +157,7 @@ bool TestSplit2() ...@@ -157,7 +157,7 @@ bool TestSplit2()
t->SetZeroAll();; t->SetZeroAll();;
/* call split function */ /* call split function */
Split(s, t, 1, 2); _Split(s, t, 1, 2);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -175,7 +175,7 @@ bool TestSplit2() ...@@ -175,7 +175,7 @@ bool TestSplit2()
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call sum function */ /* call sum function */
Split(sGPU, tGPU, 1, 2); _Split(sGPU, tGPU, 1, 2);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
...@@ -268,7 +268,7 @@ bool TestSplit3() ...@@ -268,7 +268,7 @@ bool TestSplit3()
tList.Add(t2); tList.Add(t2);
/* call split function */ /* call split function */
Split(s, &tList, 1, 2); _Split(s, &tList, 1, 2);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
...@@ -295,7 +295,7 @@ bool TestSplit3() ...@@ -295,7 +295,7 @@ bool TestSplit3()
tList.Add(tGPU2); tList.Add(tGPU2);
/* call split function */ /* call split function */
Split(s, &tList, 1, 2); _Split(s, &tList, 1, 2);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
......
...@@ -49,16 +49,24 @@ bool TestSum1() ...@@ -49,16 +49,24 @@ bool TestSum1()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize);
XTensor * c = NewTensor(order, dimSize);
XTensor * cMe = NewTensor(order, dimSize);
XTensor cUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
cMe->SetData(aData, unitNum);
b->SetData(bData, unitNum); b->SetData(bData, unitNum);
c->SetZeroAll();
/* call sum function */ /* call sum function */
_Sum(a, b, a); _Sum(a, b, c);
_SumMe(cMe, b);
cUser = Sum(*a, *b);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, unitNum); cpuTest = c->CheckData(answer, unitNum)
&& cMe->CheckData(answer, unitNum) && cUser.CheckData(answer, unitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -67,22 +75,34 @@ bool TestSum1() ...@@ -67,22 +75,34 @@ bool TestSum1()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum); bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* call sum function */ /* call sum function */
_Sum(aGPU, bGPU, aGPU); _Sum(aGPU, bGPU, cGPU);
_SumMe(cMeGPU, bGPU);
cUserGPU = Sum(*aGPU, *bGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, unitNum); gpuTest = cGPU->CheckData(answer, unitNum)
&& cMeGPU->CheckData(answer, unitNum) && cUserGPU.CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete c;
delete cMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -90,6 +110,8 @@ bool TestSum1() ...@@ -90,6 +110,8 @@ bool TestSum1()
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete c;
delete cMe;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
...@@ -124,17 +146,23 @@ bool TestSum2() ...@@ -124,17 +146,23 @@ bool TestSum2()
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize);
XTensor * c = NewTensor(order, dimSize); XTensor * c = NewTensor(order, dimSize);
XTensor * cMe = NewTensor(order, dimSize);
XTensor cUser;
/* Initalize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
cMe->SetData(aData, unitNum);
b->SetData(bData, unitNum); b->SetData(bData, unitNum);
c->SetZeroAll(); c->SetZeroAll();
/* call Sum function */ /* call sum function */
_Sum(a, b, c, beta); _Sum(a, b, c, beta);
_SumMe(cMe, b, beta);
cUser = Sum(*a, *b, beta);
/* check results */ /* check results */
cpuTest = c->CheckData(answer, unitNum); cpuTest = c->CheckData(answer, unitNum)
&& cMe->CheckData(answer, unitNum) && cUser.CheckData(answer, unitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -144,25 +172,33 @@ bool TestSum2() ...@@ -144,25 +172,33 @@ bool TestSum2()
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initalize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum); bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll(); cGPU->SetZeroAll();
/* call Sum function */ /* call sum function */
_Sum(aGPU, bGPU, cGPU, beta); _Sum(aGPU, bGPU, cGPU, beta);
_SumMe(cMeGPU, bGPU, beta);
cUserGPU = Sum(*aGPU, *bGPU, beta);
/* check results */ /* check results */
gpuTest = cGPU->CheckData(answer, unitNum); gpuTest = cGPU->CheckData(answer, unitNum)
&& cMeGPU->CheckData(answer, unitNum) && cUserGPU.CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete c; delete c;
delete cMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete cGPU; delete cGPU;
delete cMeGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -171,6 +207,7 @@ bool TestSum2() ...@@ -171,6 +207,7 @@ bool TestSum2()
delete a; delete a;
delete b; delete b;
delete c; delete c;
delete cMe;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
......
...@@ -79,7 +79,7 @@ bool TestSumByColumnTV1() ...@@ -79,7 +79,7 @@ bool TestSumByColumnTV1()
b->SetData(bData, bUnitNum); b->SetData(bData, bUnitNum);
/* call SumByColumnTV function */ /* call SumByColumnTV function */
SumByColumnTV(a, b, c); _SumByColumnTV(a, b, c);
/* check results */ /* check results */
cpuTest = c->CheckData(answer, cUnitNum); cpuTest = c->CheckData(answer, cUnitNum);
...@@ -99,7 +99,7 @@ bool TestSumByColumnTV1() ...@@ -99,7 +99,7 @@ bool TestSumByColumnTV1()
cGPU->SetZeroAll(); cGPU->SetZeroAll();
/* call SumByColumnTV function */ /* call SumByColumnTV function */
SumByColumnTV(aGPU, bGPU, cGPU); _SumByColumnTV(aGPU, bGPU, cGPU);
/* check results */ /* check results */
gpuTest = cGPU->CheckData(answer, cUnitNum); gpuTest = cGPU->CheckData(answer, cUnitNum);
...@@ -129,94 +129,6 @@ bool TestSumByColumnTV1() ...@@ -129,94 +129,6 @@ bool TestSumByColumnTV1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/*
case 2: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
bool TestSumByColumnTV2()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 1) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 1;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][1] = { {1.0F},
{0.0F} };
DTYPE answer[2][4] = { {1.0F, 2.0F, 3.0F, 4.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(a, b);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(aGPU, bGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -237,15 +149,6 @@ bool TestSumByColumnTV() ...@@ -237,15 +149,6 @@ bool TestSumByColumnTV()
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSumByColumnTV2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -80,7 +80,7 @@ bool TestSumByColumnVT1() ...@@ -80,7 +80,7 @@ bool TestSumByColumnVT1()
c->SetZeroAll(); c->SetZeroAll();
/* call SumByColumnVT function */ /* call SumByColumnVT function */
SumByColumnVT(a, b, c); _SumByColumnVT(a, b, c);
/* check results */ /* check results */
cpuTest = c->CheckData(answer, cUnitNum); cpuTest = c->CheckData(answer, cUnitNum);
...@@ -100,7 +100,7 @@ bool TestSumByColumnVT1() ...@@ -100,7 +100,7 @@ bool TestSumByColumnVT1()
cGPU->SetZeroAll(); cGPU->SetZeroAll();
/* call SumByColumnVT function */ /* call SumByColumnVT function */
SumByColumnVT(aGPU, bGPU, cGPU); _SumByColumnVT(aGPU, bGPU, cGPU);
/* check results */ /* check results */
gpuTest = cGPU->CheckData(answer, cUnitNum); gpuTest = cGPU->CheckData(answer, cUnitNum);
...@@ -130,94 +130,6 @@ bool TestSumByColumnVT1() ...@@ -130,94 +130,6 @@ bool TestSumByColumnVT1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/*
case 2: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
bool TestSumByColumnVT2()
{
/* a tensor of size (2, 1) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 1;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 4) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][1] = { {1.0F},
{0.0F} };
DTYPE bData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][1] = { {7.0F},
{22.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnVT function */
SumByColumnVT(a, b);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call SumByColumnVT function */
SumByColumnVT(aGPU, bGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -238,15 +150,6 @@ bool TestSumByColumnVT() ...@@ -238,15 +150,6 @@ bool TestSumByColumnVT()
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSumByColumnVT2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -84,11 +84,11 @@ bool TestTopK1() ...@@ -84,11 +84,11 @@ bool TestTopK1()
/* call TopK function */ /* call TopK function */
int dim = 0; int dim = 0;
int k = sDimSize[dim]; int k = sDimSize[dim];
TopK(s, t1, index1, dim, k); _TopK(s, t1, index1, dim, k);
dim = 1; dim = 1;
k = sDimSize[dim]; k = sDimSize[dim];
TopK(s, t2, index2, dim, k); _TopK(s, t2, index2, dim, k);
/* check results */ /* check results */
cpuTest = t1->CheckData(tAnswer1, tUnitNum) && cpuTest = t1->CheckData(tAnswer1, tUnitNum) &&
...@@ -117,11 +117,11 @@ bool TestTopK1() ...@@ -117,11 +117,11 @@ bool TestTopK1()
/* call TopK function */ /* call TopK function */
dim = 0; dim = 0;
k = sDimSize[dim]; k = sDimSize[dim];
TopK(sGPU, tGPU1, indexGPU1, dim, k); _TopK(sGPU, tGPU1, indexGPU1, dim, k);
dim = 1; dim = 1;
k = sDimSize[dim]; k = sDimSize[dim];
TopK(sGPU, tGPU2, indexGPU2, dim, k); _TopK(sGPU, tGPU2, indexGPU2, dim, k);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(tAnswer1, tUnitNum) && gpuTest = tGPU1->CheckData(tAnswer1, tUnitNum) &&
...@@ -207,7 +207,7 @@ bool TestTopK2() ...@@ -207,7 +207,7 @@ bool TestTopK2()
/* call TopK function */ /* call TopK function */
int dim = 1; int dim = 1;
int k = tDimSize[dim]; int k = tDimSize[dim];
TopK(s, t, index, dim, k); _TopK(s, t, index, dim, k);
/* check results */ /* check results */
cpuTest = t->CheckData(tAnswer, tUnitNum) && index->CheckData(indexAnswer, tUnitNum); cpuTest = t->CheckData(tAnswer, tUnitNum) && index->CheckData(indexAnswer, tUnitNum);
...@@ -229,7 +229,7 @@ bool TestTopK2() ...@@ -229,7 +229,7 @@ bool TestTopK2()
/* call TopK function */ /* call TopK function */
dim = 1; dim = 1;
k = tDimSize[dim]; k = tDimSize[dim];
TopK(sGPU, tGPU, indexGPU, dim, k); _TopK(sGPU, tGPU, indexGPU, dim, k);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(tAnswer, tUnitNum) && indexGPU->CheckData(indexAnswer, tUnitNum); gpuTest = tGPU->CheckData(tAnswer, tUnitNum) && indexGPU->CheckData(indexAnswer, tUnitNum);
......
...@@ -91,8 +91,8 @@ bool TestUnsqueeze1() ...@@ -91,8 +91,8 @@ bool TestUnsqueeze1()
t2->SetZeroAll(); t2->SetZeroAll();
/* call Unsqueeze function */ /* call Unsqueeze function */
Unsqueeze(s, t1, 1, 2); _Unsqueeze(s, t1, 1, 2);
Unsqueeze(s, t2, 2, 2); _Unsqueeze(s, t2, 2, 2);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
...@@ -112,8 +112,8 @@ bool TestUnsqueeze1() ...@@ -112,8 +112,8 @@ bool TestUnsqueeze1()
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call Unsqueeze function */ /* call Unsqueeze function */
Unsqueeze(sGPU, tGPU1, 1, 2); _Unsqueeze(sGPU, tGPU1, 1, 2);
Unsqueeze(sGPU, tGPU2, 2, 2); _Unsqueeze(sGPU, tGPU2, 2, 2);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
......
...@@ -29,10 +29,13 @@ bool Test() ...@@ -29,10 +29,13 @@ bool Test()
bool wrong = false; bool wrong = false;
XPRINT(0, stdout, "Testing the XTensor utilites ... \n\n"); XPRINT(0, stdout, "Testing the XTensor utilites ... \n\n");
wrong = !TestAbsolute() || wrong;
wrong = !TestConcatenate() || wrong; wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong; wrong = !TestConcatenateSolely() || wrong;
wrong = !TestConvertDataType() || wrong;
wrong = !TestCopyIndexed() || wrong; wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong; wrong = !TestCopyValues() || wrong;
wrong = !TestLog() || wrong;
wrong = !TestMatrixMul() || wrong; wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong; wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMul2DParallel() || wrong; wrong = !TestMatrixMul2DParallel() || wrong;
...@@ -52,6 +55,7 @@ bool Test() ...@@ -52,6 +55,7 @@ bool Test()
wrong = !TestSelect() || wrong; wrong = !TestSelect() || wrong;
wrong = !TestSetAscendingOrder() || wrong; wrong = !TestSetAscendingOrder() || wrong;
wrong = !TestSetData() || wrong; wrong = !TestSetData() || wrong;
wrong = !TestSign() || wrong;
wrong = !TestSort() || wrong; wrong = !TestSort() || wrong;
wrong = !TestSplit() || wrong; wrong = !TestSplit() || wrong;
wrong = !TestSum() || wrong; wrong = !TestSum() || wrong;
......
...@@ -22,10 +22,13 @@ ...@@ -22,10 +22,13 @@
#ifndef __TEST_H__ #ifndef __TEST_H__
#define __TEST_H__ #define __TEST_H__
#include "TAbsolute.h"
#include "TConcatenate.h" #include "TConcatenate.h"
#include "TConcatenateSolely.h" #include "TConcatenateSolely.h"
#include "TConvertDataType.h"
#include "TCopyIndexed.h" #include "TCopyIndexed.h"
#include "TCopyValues.h" #include "TCopyValues.h"
#include "TLog.h"
#include "TMatrixMul.h" #include "TMatrixMul.h"
#include "TMatrixMul2D.h" #include "TMatrixMul2D.h"
#include "TMatrixMul2DParallel.h" #include "TMatrixMul2DParallel.h"
...@@ -45,6 +48,7 @@ ...@@ -45,6 +48,7 @@
#include "TSelect.h" #include "TSelect.h"
#include "TSetAscendingOrder.h" #include "TSetAscendingOrder.h"
#include "TSetData.h" #include "TSetData.h"
#include "TSign.h"
#include "TSort.h" #include "TSort.h"
#include "TSplit.h" #include "TSplit.h"
#include "TSum.h" #include "TSum.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论