Commit 03adb24b by liyinqiao

Bug fixed.

ReduceSum and ReduceMax bug fixed.
parent 2f4da0fa
...@@ -86,7 +86,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim) ...@@ -86,7 +86,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim)
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip)+j * vecBufLength); \ vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip)+j * vecBufLength); \
} \ } \
for (int j = 1; j < strideNum / 32; j++) { \ for (int j = 1; j < strideNum / 32; j++) { \
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength); \ const DTYPE* ptr = (DTYPE*)(ip + j *4 * vecBufLength); \
vecBuf[0] = vecBuf[0]._vectorOp(VectorBuffer::loadu(ptr + 0 * vecBufLength)); \ vecBuf[0] = vecBuf[0]._vectorOp(VectorBuffer::loadu(ptr + 0 * vecBufLength)); \
vecBuf[1] = vecBuf[1]._vectorOp(VectorBuffer::loadu(ptr + 1 * vecBufLength)); \ vecBuf[1] = vecBuf[1]._vectorOp(VectorBuffer::loadu(ptr + 1 * vecBufLength)); \
vecBuf[2] = vecBuf[2]._vectorOp(VectorBuffer::loadu(ptr + 2 * vecBufLength)); \ vecBuf[2] = vecBuf[2]._vectorOp(VectorBuffer::loadu(ptr + 2 * vecBufLength)); \
...@@ -106,7 +106,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim) ...@@ -106,7 +106,7 @@ void _funcCPUName(const XTensor * input, XTensor * output, int dim)
else { \ else { \
/* data is separated */ \ /* data is separated */ \
for(int i = 0; i < blockNum; i++){ \ for(int i = 0; i < blockNum; i++){ \
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){ \ for(int j = 0; j < stride / 32; j++){ \
DTYPE * ip = (DTYPE*)input->data + blockSize * i; \ DTYPE * ip = (DTYPE*)input->data + blockSize * i; \
DTYPE * op = (DTYPE*)output->data + stride * i; \ DTYPE * op = (DTYPE*)output->data + stride * i; \
VectorBuffer vecBuf[4]; \ VectorBuffer vecBuf[4]; \
......
...@@ -105,7 +105,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -105,7 +105,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength, isExp, power, bias); vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength, isExp, power, bias);
} }
for(int j = 1; j < strideNum / 32; j++){ for(int j = 1; j < strideNum / 32; j++){
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength); const DTYPE* ptr = (DTYPE*)(ip + (j * 4) * vecBufLength);
vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias); vecBuf[0] = vecBuf[0] + VectorBuffer::loadu(ptr + 0 * vecBufLength, isExp, power, bias);
vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias); vecBuf[1] = vecBuf[1] + VectorBuffer::loadu(ptr + 1 * vecBufLength, isExp, power, bias);
vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias); vecBuf[2] = vecBuf[2] + VectorBuffer::loadu(ptr + 2 * vecBufLength, isExp, power, bias);
...@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} else{ } else{
//data is separated //data is separated
for(int i = 0; i < blockNum; i++){ for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){ for(int j = 0; j < stride / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i; DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i; DTYPE * op = (DTYPE*)output->data + stride * i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL; DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL;
...@@ -133,7 +133,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -133,7 +133,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} }
VectorBuffer vecBuf[4]; VectorBuffer vecBuf[4];
for(int k = 0; k < 4; k++){ for(int k = 0; k < 4; k++){
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + j * 32 / sizeof(DTYPE)); vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE), isExp, power, bias + k * 32 / sizeof(DTYPE));
} }
for(int k = 1; k < strideNum; k++){ for(int k = 1; k < strideNum; k++){
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论