Commit 1c26ff5b by xiaotong

better control of the strategy of the merge method

parent 78954fad
...@@ -49,6 +49,8 @@ struct XLink; ...@@ -49,6 +49,8 @@ struct XLink;
#define USE_BATCHED_STRIDED_MAT_MUL #define USE_BATCHED_STRIDED_MAT_MUL
#define MIN_TENSOR_SPLIT_NUM 0 #define MIN_TENSOR_SPLIT_NUM 0
#define MIN_TENSOR_SPLIT_LIST_NUM 1024 #define MIN_TENSOR_SPLIT_LIST_NUM 1024
#define MIN_TENSOR_MERGE_NUM 0
#define MIN_TENSOR_MERGE_LIST_NUM 1024
#define MIN_TENSOR_CAT_NUM 8 #define MIN_TENSOR_CAT_NUM 8
/* computation flags */ /* computation flags */
......
...@@ -94,7 +94,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -94,7 +94,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
gridSize = blockNum; gridSize = blockNum;
gridNum = s->unitNum / (blockSize * blockNum); gridNum = s->unitNum / (blockSize * blockNum);
if (mergedNum * gridNum <= MIN_TENSOR_SPLIT_NUM) { if (mergedNum * gridNum <= MIN_TENSOR_MERGE_NUM) {
int sPitch = blockSize * s->unitSize; int sPitch = blockSize * s->unitSize;
int tPtich = blockSize * mergedNum * t->unitSize; int tPtich = blockSize * mergedNum * t->unitSize;
int mSize = blockSize * t->unitSize; int mSize = blockSize * t->unitSize;
...@@ -253,7 +253,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge) ...@@ -253,7 +253,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
gridNum = s0->unitNum / (blockSize * blockNum); gridNum = s0->unitNum / (blockSize * blockNum);
/* merging with fewer data copy operations */ /* merging with fewer data copy operations */
if (mergedNum * gridNum <= MIN_TENSOR_SPLIT_LIST_NUM) { if (mergedNum * gridNum <= MIN_TENSOR_MERGE_LIST_NUM) {
int sPitch = blockSize * s0->unitSize; int sPitch = blockSize * s0->unitSize;
int tPtich = blockSize * mergedNum * big->unitSize; int tPtich = blockSize * mergedNum * big->unitSize;
int mSize = blockSize * big->unitSize; int mSize = blockSize * big->unitSize;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论