Commit 39eef110 by xiaotong

bug fixes

parent e9d68683
...@@ -42,7 +42,7 @@ float LengthPenalizer::GNMT(float length, float alpha) ...@@ -42,7 +42,7 @@ float LengthPenalizer::GNMT(float length, float alpha)
base = (length + 5.0F) / (1.0F + 5.0F); base = (length + 5.0F) / (1.0F + 5.0F);
lp = pow(base, alpha); lp = (float)pow(base, alpha);
return lp; return lp;
} }
......
...@@ -155,13 +155,13 @@ void XMemSet(int devID, void * p, int value, size_t size) ...@@ -155,13 +155,13 @@ void XMemSet(int devID, void * p, int value, size_t size)
cudaMemcpyKind GetMemcpyKind(int devIDFrom, int devIDTo) cudaMemcpyKind GetMemcpyKind(int devIDFrom, int devIDTo)
{ {
if(devIDFrom < 0 && devIDTo < 0) if(devIDFrom < 0 && devIDTo < 0)
return cudaMemcpyHostToHost; return cudaMemcpyKind::cudaMemcpyHostToHost;
else if(devIDFrom < 0 && devIDTo >= 0) else if(devIDFrom < 0 && devIDTo >= 0)
return cudaMemcpyHostToDevice; return cudaMemcpyKind::cudaMemcpyHostToDevice;
else if(devIDFrom >= 0 && devIDTo < 0) else if(devIDFrom >= 0 && devIDTo < 0)
return cudaMemcpyDeviceToHost; return cudaMemcpyKind::cudaMemcpyDeviceToHost;
else else
return cudaMemcpyDeviceToDevice; return cudaMemcpyKind::cudaMemcpyDeviceToDevice;
} }
#endif #endif
...@@ -553,9 +553,9 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com ...@@ -553,9 +553,9 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
stackptr = 0; stackptr = 0;
lo = (char*)data; lo = (char*)data;
hi = (char*)data + realStride * (num - 1); hi = (char*)data + (long)realStride * (num - 1);
indexlo = (int*)index; indexlo = (int*)index;
indexhi = index != NULL ? (int*)index + stride * (num - 1) : NULL; indexhi = index != NULL ? (int*)index + (long)stride * (num - 1) : NULL;
recurse: recurse:
...@@ -565,8 +565,8 @@ recurse: ...@@ -565,8 +565,8 @@ recurse:
if(size <= MIN_QSORT_NUM) if(size <= MIN_QSORT_NUM)
XShortSort(lo, hi, indexlo, indexhi, width, stride, comp); XShortSort(lo, hi, indexlo, indexhi, width, stride, comp);
else { else {
mid = lo + (size/2) * realStride; mid = lo + (long)(size/2) * realStride;
indexmid = indexlo + (size/2) * stride; indexmid = indexlo + (long)(size/2) * stride;
/* sort the first, last and middle elements into order */ /* sort the first, last and middle elements into order */
if(comp(lo, mid) > 0) if(comp(lo, mid) > 0)
...@@ -834,8 +834,7 @@ int SplitALine(char* inputString, const char* seperator, StrList* items) ...@@ -834,8 +834,7 @@ int SplitALine(char* inputString, const char* seperator, StrList* items)
return 0; return 0;
if (sepLen == 0) { if (sepLen == 0) {
char* item = new char[(long)inputLen + 1];
char* item = new char[inputLen + 1];
strcpy(item, inputString); strcpy(item, inputString);
items->Add(item); items->Add(item);
} }
......
...@@ -122,12 +122,12 @@ void TTDataLoader::SetBatchSize(int myBatchSize) ...@@ -122,12 +122,12 @@ void TTDataLoader::SetBatchSize(int myBatchSize)
/* start the process */ /* start the process */
bool TTDataLoader::Start() bool TTDataLoader::Start()
{ {
file = fopen(fileName, "wb"); file = fopen(fileName, "rb");
CheckNTErrors(file, "Cannot open the file"); CheckNTErrors(file != NULL, "Cannot open the file");
/* skip the first line */ /* skip the first line */
char * line = new char[MAX_SAMPLE_LINE_LENGTH]; char * line = new char[MAX_SAMPLE_LINE_LENGTH];
fgets(line, MAX_SAMPLE_LINE_LENGTH - 1, file); fgets(line, MAX_SAMPLE_LINE_LENGTH, file);
delete[] line; delete[] line;
return true; return true;
...@@ -148,6 +148,7 @@ get a batch of samples ...@@ -148,6 +148,7 @@ get a batch of samples
*/ */
bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds) bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds)
{ {
fprintf(stderr, "get batch 0\n");
CheckNTErrors(file != NULL, "No input file specificed!"); CheckNTErrors(file != NULL, "No input file specificed!");
CheckNTErrors(inputs != NULL && inputs->count >= 1, "Wrong argument!"); CheckNTErrors(inputs != NULL && inputs->count >= 1, "Wrong argument!");
CheckNTErrors(golds != NULL && golds->count >= 1, "Wrong argument!"); CheckNTErrors(golds != NULL && golds->count >= 1, "Wrong argument!");
...@@ -162,9 +163,9 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds) ...@@ -162,9 +163,9 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds)
int * goldBatch = new int[batchSize]; int * goldBatch = new int[batchSize];
int A, B, C, D; int A, B, C, D;
while (fgets(line, MAX_SAMPLE_LINE_LENGTH - 1, file)) { while (fgets(line, MAX_SAMPLE_LINE_LENGTH, file)) {
if (count++ == batchSize) if (count == batchSize)
break; break;
if (sscanf(line, "%d %d %d %d", &A, &B, &C, &D) < 4) { if (sscanf(line, "%d %d %d %d", &A, &B, &C, &D) < 4) {
...@@ -175,19 +176,28 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds) ...@@ -175,19 +176,28 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds)
inputBatch[count * 3 + 1] = B; inputBatch[count * 3 + 1] = B;
inputBatch[count * 3 + 2] = C; inputBatch[count * 3 + 2] = C;
goldBatch[count] = D; goldBatch[count] = D;
count++;
} }
if (count > 0) {
InitTensor2D(input, count, 3, X_INT); InitTensor2D(input, count, 3, X_INT);
InitTensor2D(gold, count, 1, X_INT); InitTensor2D(gold, count, 1, X_INT);
input->SetData(input, count * 3); input->SetData(input, count * 3);
gold->SetData(gold, count); gold->SetData(gold, count);
}
delete[] line; delete[] line;
delete[] inputBatch; delete[] inputBatch;
delete[] goldBatch; delete[] goldBatch;
fprintf(stderr, "get batch 1\n");
if (count > 0)
return true; return true;
else
return false;
} }
/***************************** /*****************************
...@@ -233,6 +243,8 @@ void TTModel::Forward(int devID, XTensor * input, XTensor * output) ...@@ -233,6 +243,8 @@ void TTModel::Forward(int devID, XTensor * input, XTensor * output)
XTensor embeddingCat; XTensor embeddingCat;
XTensor hidden; XTensor hidden;
fprintf(stderr, "forward 0\n");
/* [e_0, e_1, e_2] = w_e * input(one-hot) */ /* [e_0, e_1, e_2] = w_e * input(one-hot) */
embedding = Gather(embeddingW, *input); embedding = Gather(embeddingW, *input);
...@@ -244,6 +256,8 @@ void TTModel::Forward(int devID, XTensor * input, XTensor * output) ...@@ -244,6 +256,8 @@ void TTModel::Forward(int devID, XTensor * input, XTensor * output)
/* output = Softmax(h) */ /* output = Softmax(h) */
*output = Softmax(hidden, 0); *output = Softmax(hidden, 0);
fprintf(stderr, "forward 1\n");
} }
/* clear the model */ /* clear the model */
......
...@@ -264,7 +264,57 @@ bool XLeader::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -264,7 +264,57 @@ bool XLeader::Run(XConfig * config, DataDistributeBase * dataDistributor,
ShowNTErrors("No data-broadcasting workers!"); ShowNTErrors("No data-broadcasting workers!");
} }
WaitForFinishing();
return isDataOK; return isDataOK;
} }
/* wait until all workers finish their job */
void XLeader::WaitForFinishing(int sleepTime)
{
while (1) {
bool finished = true;
if (finished) {
for (int i = 0; i < jworkers.count; i++) {
XWorkerJob* worker = (XWorkerJob*)jworkers[i];
if (worker->GetJobNum() > 0) {
finished = false;
break;
}
}
}
if (finished) {
for (int i = 0; i < cworkers.count; i++) {
XWorkerJob* worker = (XWorkerJob*)cworkers[i];
if (worker->GetJobNum() > 0) {
finished = false;
break;
}
}
}
if (finished) {
for (int i = 0; i < uworkers.count; i++) {
XWorkerJob* worker = (XWorkerJob*)uworkers[i];
if (worker->GetJobNum() > 0) {
finished = false;
break;
}
}
}
if (finished)
break;
#ifdef _WIN32
Sleep((DWORD)sleepTime);
#else
sleep((unsigned)sleepTime / 1000);
#endif
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_NUM_OF_WORKERS 1024 #define MAX_NUM_OF_WORKERS 1024
#define SLEEP_TIME_IN_WAITING_FOR_JOBS 10
/* /*
conmmunication mode of a leader. This offers a way of organizing a hierachy of the work conmmunication mode of a leader. This offers a way of organizing a hierachy of the work
...@@ -126,6 +127,9 @@ public: ...@@ -126,6 +127,9 @@ public:
/* run the model (for one time) */ /* run the model (for one time) */
bool Run(XConfig * config, DataDistributeBase * dataDistributor, bool Run(XConfig * config, DataDistributeBase * dataDistributor,
XModel * model, XOptimizer * optimizer); XModel * model, XOptimizer * optimizer);
/* wait until all workers finish their job */
void WaitForFinishing(int sleepTime = SLEEP_TIME_IN_WAITING_FOR_JOBS);
}; };
} }
......
...@@ -124,8 +124,10 @@ bool XModel::Run(XList * args) ...@@ -124,8 +124,10 @@ bool XModel::Run(XList * args)
XModel * model = (XModel*)args->GetItem(0); XModel * model = (XModel*)args->GetItem(0);
XList newArgs; XList newArgs;
for (int i = 1; i < args->count; i++) for (int i = 1; i < args->count; i++) {
newArgs.Add(args->GetItem(i)); void * arg = args->GetItem(i);
newArgs.Add(arg);
}
return model->RunMe(&newArgs); return model->RunMe(&newArgs);
} }
......
...@@ -105,6 +105,7 @@ add a new job of model refreshment ...@@ -105,6 +105,7 @@ add a new job of model refreshment
*/ */
bool XWorkerJob::AddJobRefresh(XModel * myModel) bool XWorkerJob::AddJobRefresh(XModel * myModel)
{ {
fprintf(stderr, "refresh 0\n");
CheckNTErrors(myModel != NULL, "no parameter keeper!"); CheckNTErrors(myModel != NULL, "no parameter keeper!");
XList args(1); XList args(1);
...@@ -112,6 +113,8 @@ bool XWorkerJob::AddJobRefresh(XModel * myModel) ...@@ -112,6 +113,8 @@ bool XWorkerJob::AddJobRefresh(XModel * myModel)
queue.EnqueueJob((void*)(char*)XModel::Refresh, &args); queue.EnqueueJob((void*)(char*)XModel::Refresh, &args);
fprintf(stderr, "refresh 1\n");
return true; return true;
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论