Commit a52ba88e by xiaotong

code cleaning and CPU code update

parent 314f4370
......@@ -187,7 +187,7 @@ void XNet::Backward(TensorList &roots, TensorList &golds, TensorList &paddings,
node->visitMark = NODE_UNFINISHED;
}
XLossGrad lossGrad;
//XLossGrad lossGrad;
/* we start with the gradient with respect to the loss for output layers */
/*for(int i = 0; i < roots.count; i++){
......
......@@ -298,7 +298,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */
Descale(preID, sizeVocab);
_DescaleMe(preID, sizeVocab);
/* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
......
......@@ -1508,16 +1508,27 @@ XMemManager::~XMemManager()
MTYPE XMemManager::GetAvailableMemory()
{
unsigned long freeMem = 0;
#ifndef WIN32
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#else
#if __APPLE__
int mib[2] = {CTL_HW, HW_MEMSIZE};
unsigned int namelen = sizeof(mib) / sizeof(mib[0]);
unsigned long long size;
size_t len = sizeof(size);
if (sysctl(mib, namelen, &size, &len, NULL, 0) < 0){
ShowNTErrors("Cannot get memory size on Mac!");
}
else{
return size;
}
#elif _WIN32
MEMORYSTATUSEX memoryStatus;
memoryStatus.dwLength = sizeof(memoryStatus);
if (GlobalMemoryStatusEx(&memoryStatus)){
freeMem = memoryStatus.ullAvailPhys;
}
#else
long pages = sysconf(_SC_AVPHYS_PAGES);
long page_size = sysconf(_SC_PAGE_SIZE);
freeMem = pages * page_size;
#endif
return (MTYPE)freeMem;
}
......@@ -1526,8 +1537,9 @@ MTYPE XMemManager::GetAvailableMemory()
MTYPE XMemManager::GetAvailableGPUMemory(int devID)
{
size_t freeMem = 0;
size_t totalMem = 0;
#ifdef USE_CUDA
size_t totalMem = 0;
cudaSetDevice(devID);
if (cudaMemGetInfo(&freeMem, &totalMem) != cudaSuccess){
XPRINT(0, stderr, "cannot get GPU memory information.");
......@@ -1638,12 +1650,12 @@ void XMemManager::ShowMemInfo()
int myBlockNum;
for(int i = 0; i < nCPUMem; i++){
GetMemSize(-1, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%d, blockNum:%d, bufSize:%d\n", myBlockSize, myBlockNum, myBufSize);
XPRINT3(1, stderr, " - id:-1 CPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", myBlockSize, myBlockNum, myBufSize);
}
for(int i = 0; i < nGPUMem; i++){
GetMemSize(i, &myBlockSize, &myBlockNum, &myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%d, blockNum:%d, bufSize:%d\n", i, myBlockSize, myBlockNum, myBufSize);
XPRINT4(1, stderr, " - id:%2d GPU, blockSize:%lld, blockNum:%d, bufSize:%lld\n", i, myBlockSize, myBlockNum, myBufSize);
}
}
......
......@@ -39,10 +39,13 @@
#include <curand.h>
#endif
#ifndef WIN32
#include <unistd.h>
#else
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#elif WIN32
#include <windows.h>
#else
#include <unistd.h>
#endif
/* the nts (NiuTrans.Tensor) namespace */
......
......@@ -111,10 +111,9 @@ void _IndexToOnehot(XTensor * index, XTensor * onehot, int size, float labelSmoo
onehot->SetZeroAll();
#ifdef USE_CUDA
float confidence = 1 - labelSmoothingP;
float lowconfidence = labelSmoothingP / size;
#ifdef USE_CUDA
if(onehot->devID >= 0 && index->devID >= 0) {
_CudaIndexToOnehot(index, onehot, size, confidence, lowconfidence);
return;
......
......@@ -170,14 +170,10 @@ SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#else
/* define three marco separately, specify the respective function names (CPU mode) */
#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, _cudaFuncName, origFunc) \
#define _SIMPLE_BINARY_FUNCTION_INT(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, int num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors(a->devID < 0, "No GPU code is supported"); \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_INT&&b->dataType == X_INT), "TODO!"); \
......@@ -187,14 +183,10 @@ void _funcName(const XTensor * a, XTensor * b, int num) \
db[i] = (int)origFunc(d[i], num); \
} \
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
#define _SIMPLE_BINARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, float num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors(a->devID < 0, "No GPU code is supported"); \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
CheckNTErrors((a->dataType == X_FLOAT&&b->dataType == X_FLOAT), "TODO!");\
......@@ -228,34 +220,36 @@ void funcName(const XTensor &a, XTensor &b, float num) \
_funcName(&a, &b, num); \
} \
_SIMPLE_BINARY_FUNCTION_INT(_Scale, _CudaScale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION_INT(_Scale, scale)
SIMPLE_BINARY_FUNCTION_ME_INT(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_INT(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScaleFloat, scale)
SIMPLE_BINARY_FUNCTION_ME(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION(_Scale, scale)
SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale)
_SIMPLE_BINARY_FUNCTION_INT(_Descale, _CudaDescale, descale)
SIMPLE_BINARY_FUNCTION_ME_INT(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION_INT(_Descale, descale)
SIMPLE_BINARY_FUNCTION_ME_INT(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_INT(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescaleFloat, descale)
SIMPLE_BINARY_FUNCTION_ME(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION(_Descale, descale)
SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale)
_SIMPLE_BINARY_FUNCTION_INT(_Shift, _CudaShift, shift)
SIMPLE_BINARY_FUNCTION_ME_INT(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION_INT(_Shift, shift)
SIMPLE_BINARY_FUNCTION_ME_INT(_Shift, _Shift)
SIMPLE_BINARY_FUNCTION_INT(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShiftFloat, shift)
SIMPLE_BINARY_FUNCTION_ME(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION(_Shift, shift)
SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(Mod, _Mod)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#endif
} // namespace nts(NiuTrans.Tensor)
......@@ -149,7 +149,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask;
int * maskArrayInt = NULL;
DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论