Commit 75124987 by xiaotong

bug fixes

parent a26caf40
...@@ -34,7 +34,7 @@ void TransposeTest(); ...@@ -34,7 +34,7 @@ void TransposeTest();
void SumDimTest(); void SumDimTest();
using namespace nts; using namespace nts;
using namespace samplefnnlm; using namespace fnnlm;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
......
...@@ -143,7 +143,7 @@ void XMathGrad::GradSumDim(XTensor * node) ...@@ -143,7 +143,7 @@ void XMathGrad::GradSumDim(XTensor * node)
Then reduce along with z and x to obtain dE/db. */ Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize); node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->devID, b->mem, b->dataType, b->denseRatio); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include "../../tensor/function/FHeader.h" #include "../../tensor/function/FHeader.h"
#include "../../network/XNet.h" #include "../../network/XNet.h"
namespace samplefnnlm namespace fnnlm
{ {
#define MAX_NAME_LENGTH 1024 #define MAX_NAME_LENGTH 1024
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
using namespace nts; using namespace nts;
namespace samplefnnlm namespace fnnlm
{ {
#define _EXIT_(x)// exit(x) #define _EXIT_(x)// exit(x)
...@@ -126,7 +126,7 @@ struct FNNNet ...@@ -126,7 +126,7 @@ struct FNNNet
XTensor output; XTensor output;
}; };
/* entry of the program */ /* entrance of the program */
int FNNLMMain(int argc, const char ** argv); int FNNLMMain(int argc, const char ** argv);
}; };
......
...@@ -1891,17 +1891,16 @@ generate a XTensor which allocates data on the buffer ...@@ -1891,17 +1891,16 @@ generate a XTensor which allocates data on the buffer
>> myDenseRatio - how often an element has non-zero value >> myDenseRatio - how often an element has non-zero value
*/ */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, int devID, XMem * myMem, XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType, const float myDenseRatio) const TENSOR_DATA_TYPE myDataType, const float myDenseRatio,
const int devID, XMem * myMem)
{ {
CheckNTErrors(myMem != NULL, "No memory pool specified!");
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
memcpy(dims, myDimSize, sizeof(int) * myOrder); memcpy(dims, myDimSize, sizeof(int) * myOrder);
dims[0] = -abs(dims[0]); dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, -1, myMem); XTensor * tensor = NewTensor(myOrder, dims, myDataType, myDenseRatio, devID, myMem);
if(myMem != NULL) if(myMem != NULL)
tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize); tensor->data = myMem->AllocBuf(myMem->devID, tensor->unitNum * tensor->unitSize);
...@@ -1922,8 +1921,8 @@ generate a XTensor which allocates data on the buffer ...@@ -1922,8 +1921,8 @@ generate a XTensor which allocates data on the buffer
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem) XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem)
{ {
return NewTensorBuf(reference->order, reference->dimSize, return NewTensorBuf(reference->order, reference->dimSize,
devID, myMem, reference->dataType, reference->denseRatio,
reference->dataType, reference->denseRatio); devID, myMem);
} }
/* /*
......
...@@ -391,8 +391,9 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_ ...@@ -391,8 +391,9 @@ XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL); const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a XTensor which allocates data on the buffer */ /* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize, int devID, XMem * myMem, XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F); const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a XTensor which allocates data on the buffer */ /* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem); XTensor * NewTensorBuf(const XTensor * reference, int devID, XMem * myMem);
......
...@@ -38,6 +38,14 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i}) ...@@ -38,6 +38,14 @@ log scale softmax y = log(e^x / \sum_{i} e^{x_i})
*/ */
void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
{ {
CheckNTErrors(!x->isSparse && !y->isSparse, "TODO!");
CheckNTErrors(x && y, "Empty input tensors!");
if(y->dimSize[leadDim] == 1){
y->SetZeroAll();
return;
}
int leadDimRDI = x->order - leadDim - 1; int leadDimRDI = x->order - leadDim - 1;
if (!x->isSparse && !y->isSparse && if (!x->isSparse && !y->isSparse &&
x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
...@@ -63,30 +71,31 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -63,30 +71,31 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < leadDimRDI; i++) max = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
stride *= y->dimSizeRDI[i]; sum = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
max = NewTensor(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
sum = NewTensor(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
max->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, max->unitNum * max->unitSize) : XMemAlloc(max->devID, max->unitNum * max->unitSize);
sum->data = mem != NULL ? (char*)mem->AllocBuf(mem->devID, sum->unitNum * sum->unitSize) : XMemAlloc(sum->devID, sum->unitNum * sum->unitSize);
_ReduceMax(x, max, leadDim); _ReduceMax(x, max, leadDim);
_ReduceSum(x, sum, leadDim, max, 1.0F, true); _ReduceSum(x, sum, leadDim, max, 1.0F, true);
if (x->devID >= 0) { if (x->devID >= 0) {
int dims[2]; if(leadDimRDI == 0){
dims[0] = -stride; blockSize = y->unitNum;
dims[1] = dimensionSize; blockNum = 1;
blockx = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem); blockx = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem); blocky = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
dims[0] = -stride; blockMax = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
dims[1] = 1; blockSum = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
blockMax = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem); }
blockSum = NewTensor(2, dims, x->dataType, x->denseRatio, x->devID, mem); else{
for (int i = 0; i < leadDimRDI; i++)
stride *= y->dimSizeRDI[i];
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
blockx = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2D(-stride, 1, x->dataType, x->devID, mem);
blockSum = NewTensor2D(-stride, 1, x->dataType, x->devID, mem);
}
} }
for (int k = 0; k < blockNum; k++) { for (int k = 0; k < blockNum; k++) {
...@@ -123,7 +132,10 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -123,7 +132,10 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
blockMax->data = mp; blockMax->data = mp;
blockSum->data = sp; blockSum->data = sp;
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax); if(leadDimRDI == 0)
_CudaLogSoftmaxSumMax(blockx, blocky, 1, blockSum, blockMax);
else
_CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
...@@ -135,18 +147,8 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -135,18 +147,8 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
} }
if (x->devID < 0) { if (x->devID < 0) {
if (mem != NULL) { DelTensorBuf(max);
mem->ReleaseBuf(mem->devID, max->unitNum * max->unitSize); DelTensorBuf(sum);
mem->ReleaseBuf(mem->devID, sum->unitNum * sum->unitSize);
}
else {
XMemFree(max->devID, max->data);
XMemFree(sum->devID, sum->data);
max->data = NULL;
sum->data = NULL;
}
delete max;
delete sum;
} }
else { else {
delete blockx; delete blockx;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论