Commit a52ba88e by xiaotong

code cleaning and CPU code update

parent 314f4370
...@@ -187,7 +187,7 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings, ...@@ -187,7 +187,7 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings,
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
} }
XLossGrad lossGrad; //XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */ /* we start with the gradient with respect to the loss for output layers */
/*for(int i = 0; i < roots.count; i++){ /*for(int i = 0; i < roots.count; i++){
......
...@@ -298,7 +298,7 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -298,7 +298,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
row means a previous state. The column number is size-of-beam \times vocab-size. We, row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of the therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */ previous state for each hypothesis in the top-k list. */
Descale(preID, sizeVocab); _DescaleMe(preID, sizeVocab);
/* Then, we do something similar to "preID". For the top-k predictions, we need /* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction to know their indices in the vocabulary. We compute the offset of each prediction
......
...@@ -1508,16 +1508,27 @@ XMemManager::~XMemManager() ...@@ -1508,16 +1508,27 @@ XMemManager::~XMemManager()
MTYPE XMemManager::GetAvailableMemory() MTYPE XMemManager::GetAvailableMemory()
{ {
unsigned long freeMem = 0; unsigned long freeMem = 0;
#ifndef WIN32 #if __APPLE__
long pages = sysconf(_SC_AVPHYS_PAGES); int mib[2] = {CTL_HW, HW_MEMSIZE};
long page_size = sysconf(_SC_PAGE_SIZE); unsigned int namelen = sizeof(mib) / sizeof(mib[0]);
freeMem = pages * page_size; unsigned long long size;
#else size_t len = sizeof(size);
if (sysctl(mib, namelen, &size, &len, NULL, 0) < 0){
ShowNTErrors("Cannot get memory size on Mac!");
}
else{
return size;
}
#elif _WIN32
MEMORYSTATUSEX memoryStatus; MEMORYSTATUSEX memoryStatus;
memoryStatus.dwLength = sizeof(memoryStatus); memoryStatus.dwLength = sizeof(memoryStatus);
if (GlobalMemoryStatusEx(&memoryStatus)){ if (GlobalMemoryStatusEx(&memoryStatus)){
freeMem = memoryStatus.ullAvailPhys; freeMem = memoryStatus.ullAvailPhys;
} }
#else
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#endif #endif
return (MTYPE)freeMem; return (MTYPE)freeMem;
} }
...@@ -1526,8 +1537,9 @@ MTYPE XMemManager::GetAvailableMemory() ...@@ -1526,8 +1537,9 @@ MTYPE XMemManager::GetAvailableMemory()
MTYPE XMemManager::GetAvailableGPUMemory(int devID) MTYPE XMemManager::GetAvailableGPUMemory(int devID)
{ {
size_t freeMem = 0; size_t freeMem = 0;
size_t totalMem = 0;
#ifdef USE_CUDA #ifdef USE_CUDA
size_t totalMem = 0;
cudaSetDevice(devID); cudaSetDevice(devID);
if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){ if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
XPRINT(0, stderr, "cannot get GPU memory information."); XPRINT(0, stderr, "cannot get GPU memory information.");
...@@ -1638,12 +1650,12 @@ void XMemManager::ShowMemInfo() ...@@ -1638,12 +1650,12 @@ void XMemManager::ShowMemInfo()
int myBlockNum; int myBlockNum;
for(int i = 0; i < nCPUMem; i++){ for(int i = 0; i < nCPUMem; i++){
GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize); GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%d, blockNum:%d, bufSize:%d\n", myBlockSize, myBlockNum, myBufSize); XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", myBlockSize, myBlockNum, myBufSize);
} }
for(int i = 0; i < nGPUMem; i++){ for(int i = 0; i < nGPUMem; i++){
GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize); GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%d, blockNum:%d, bufSize:%d\n", i, myBlockSize, myBlockNum, myBufSize); XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", i, myBlockSize, myBlockNum, myBufSize);
} }
} }
......
...@@ -39,10 +39,13 @@ ...@@ -39,10 +39,13 @@
#include <curand.h> #include <curand.h>
#endif #endif
#ifndef WIN32 #ifdef __APPLE__
#include <unistd.h> #include <sys/types.h>
#else #include <sys/sysctl.h>
#elif WIN32
#include <windows.h> #include <windows.h>
#else
#include <unistd.h>
#endif #endif
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
......
...@@ -111,10 +111,9 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo ...@@ -111,10 +111,9 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
onehot->SetZeroAll(); onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP; float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size; float lowconfidence = labelSmoothingP / size;
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) { if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence); _CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return; return;
......
...@@ -170,14 +170,10 @@ SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod) ...@@ -170,14 +170,10 @@ SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#else #else
/* define three marco separately, specify the respective function names (CPU mode) */ /* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_BINARY_FUNCTION_INT(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, int num) \ void _funcName(const XTensor * a, XTensor * b, int num) \
{ \ { \
/* run it on GPUs */ \ CheckNTErrors(a->devID < 0, "No GPU code is supported"); \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!"); \ CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!"); \
...@@ -187,14 +183,10 @@ void _funcName(const XTensor * a, XTensor * b, int num) \ ...@@ -187,14 +183,10 @@ void _funcName(const XTensor * a, XTensor * b, int num) \
db[i] = (int)origFunc(d[i], num); \ db[i] = (int)origFunc(d[i], num); \
} \ } \
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_BINARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, float num) \ void _funcName(const XTensor * a, XTensor * b, float num) \
{ \ { \
/* run it on GPUs */ \ CheckNTErrors(a->devID < 0, "No GPU code is supported"); \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\ CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\
...@@ -228,34 +220,36 @@ void funcName(const XTensor &a, XTensor &b, float num) \ ...@@ -228,34 +220,36 @@ void funcName(const XTensor &a, XTensor &b, float num) \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
} \ } \
_SIMPLE_BINARY_FUNCTION_INT(_Scale, _CudaScale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(Scale, _Scale) _SIMPLE_BINARY_FUNCTION_INT(_Scale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale) SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScaleFloat, scale) _SIMPLE_BINARY_FUNCTION(_Scale, scale)
SIMPLE_BINARY_FUNCTION_ME(Scale, _Scale) SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale) SIMPLE_BINARY_FUNCTION(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION_INT(_Descale, _CudaDescale, descale) _SIMPLE_BINARY_FUNCTION_INT(_Descale, descale)
SIMPLE_BINARY_FUNCTION_ME_INT(Descale, _Descale) SIMPLE_BINARY_FUNCTION_ME_INT(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale) SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescaleFloat, descale) _SIMPLE_BINARY_FUNCTION(_Descale, descale)
SIMPLE_BINARY_FUNCTION_ME(Descale, _Descale) SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale) SIMPLE_BINARY_FUNCTION(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION_INT(_Shift, _CudaShift, shift) _SIMPLE_BINARY_FUNCTION_INT(_Shift, shift)
SIMPLE_BINARY_FUNCTION_ME_INT(Shift, _Shift) SIMPLE_BINARY_FUNCTION_ME_INT(_Shift, _Shift)
SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift) SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShiftFloat, shift) _SIMPLE_BINARY_FUNCTION(_Shift, shift)
SIMPLE_BINARY_FUNCTION_ME(Shift, _Shift) SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift) SIMPLE_BINARY_FUNCTION(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod) _SIMPLE_BINARY_FUNCTION_INT(_Mod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(Mod, _Mod) SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod) SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#endif #endif
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -149,7 +149,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -149,7 +149,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!"); CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask; XTensor mask;
int * maskArrayInt = NULL;
DTYPE * maskArray = NULL; DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb); DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论