Commit 1da50ae2 by ltb

using cpu float16 and test fnn and t2t times

parent 29d2352b
......@@ -415,7 +415,19 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
XNet autoDiffer;
double startT = GetClockSec();
double mkinput = 0.0;
double mkgold = 0.0;
double train_time = 0.0;
double clearModel = 0.0;
double forward=0.0;
double backward = 0.0;
double update = 0.0;
double end = 0.0;
double start = 0.0;
double time;
/* iterate for a number of epochs */
for(epoch = 0; epoch < nEpoch; epoch++){
......@@ -426,7 +438,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
wordCount = 0;
loss = 0;
ngramNum = 1;
while(ngramNum > 0){
/* load a minibatch of ngrams */
......@@ -447,20 +458,25 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the loss tensor */
XTensor lossTensor;
start = GetClockSec();
/* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID);
mkinput += GetClockSec() - start;
start = GetClockSec();
/* make the gold tensor */
MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID);
mkgold += GetClockSec() - start;
time = GetClockSec();
if(!autoDiff){
/* prepare an empty network for building the fnn */
FNNNet net;
/* gradident = 0 */
Clear(grad, false);
/* forward computation */
Forward(inputs, output, model, net);
......@@ -475,40 +491,60 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
loss -= prob;
}
else{
start = GetClockSec();
/* gradient = 0 */
Clear(model, true);
clearModel += GetClockSec() - start;
start = GetClockSec();
/* forward + backward process */
/* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */
forward += GetClockSec() - start;
start = GetClockSec();
/* this is implemented by multiply function */
lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */
autoDiffer.Backward(lossTensor);
backward += GetClockSec() - start;
start = GetClockSec();
/* update model parameters */
Update(model, grad, learningRate, true);
update += GetClockSec() - start;
start = GetClockSec();
/* get probabilities */
float prob = ReduceSumAll(lossTensor);
loss += prob;
end += GetClockSec() - start;
}
train_time += GetClockSec() - time;
wordCount += ngramNum;
wordCountTotal += ngramNum;
if(++step >= nStep){
isEnd = true;
break;
}
if (step % 100 == 0) {
if (step % 100 == 0) {
double elapsed = GetClockSec() - startT;
startT = GetClockSec();
XPRINT8(0, stderr, "[Time] mkinput=%.5lfs,mkgold=%.5lfs,train_time=%.5lfs,clearModel=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf, end=%.5lf\n",
mkinput, mkgold, train_time, clearModel, forward, backward, update,end);
XPRINT5(0, stderr, "[INFO] elapsed=%.1fs, step=%d, epoch=%d, ngram=%d, ppl=%.3f\n",
elapsed, step, epoch + 1, wordCountTotal, exp(loss / wordCount));
mkinput = 0.0;
mkgold = 0.0;
train_time = 0.0;
clearModel = 0.0;
forward = 0.0;
backward = 0.0;
update = 0.0;
end = 0.0;
}
}
......
......@@ -148,6 +148,14 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
double startT = GetClockSec();
double mkinput = 0.0;
double train_time = 0.0;
double forward = 0.0;
double backward = 0.0;
double update = 0.0;
double start = 0.0;
double time = 0.0;
for(epoch = 1; epoch <= nepoch; epoch++){
#ifndef WIN32
if(isShuffled)
......@@ -176,18 +184,31 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
/* label smoothed gold standard (if needed) */
XTensor goldSmoothed;
while (batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
NULL, vSize, vSizeTgt,
sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
{
//while (batchLoader.LoadBatch(file, model->isLM,
// &batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
// NULL, vSize, vSizeTgt,
// sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true))
while (true)
{
start = GetClockSec();
int batch = batchLoader.LoadBatch(file, model->isLM,
&batchEnc, &paddingEnc, &batchDec, &paddingDec, &gold, &label,
NULL, vSize, vSizeTgt,
sBatchSize, wBatchSize, isLenSorted, ws, wc, devID, true);
mkinput += GetClockSec() - start;
if (!batch) {
break;
}
time = GetClockSec();
CheckNTErrors(batchEnc.order == 2, "wrong tensor order of the sequence batch");
/* output probabilities */
XTensor output;
start = GetClockSec();
/* make the network */
if(model->isLM)
model->MakeLM(batchEnc, output, paddingEnc, true);
......@@ -196,11 +217,12 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
else{
ShowNTErrors("Illegal model type!");
}
forward += GetClockSec() - start;
/* back-propagation for obtaining gradients */
//if (labelSmoothingP > 0)
// LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
start = GetClockSec();
XTensor labelOnehot;
labelOnehot = IndexToOnehot(label, vSizeTgt, labelSmoothingP);
......@@ -229,7 +251,9 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
net.Backward(lossTensor);
//net.Backward(output, labelOnehot, paddingDec, CROSSENTROPY);
//net.Backward(output, label, labelSmoothingP, CROSSENTROPY);
backward += GetClockSec() - start;
start = GetClockSec();
gradStep += 1;
loss += prob;
wordCount += wc;
......@@ -248,11 +272,13 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
gradStep = 0;
validStep++;
update += GetClockSec() - start;
}
}
else
nSkipped++;
train_time += GetClockSec() - time;
if(++step >= nstep){
isEnd = true;
break;
......@@ -260,11 +286,19 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
if (step % 100 == 0) {
double elapsed = GetClockSec() - startT;
startT = GetClockSec();
XPRINT6(0, stderr, "[Time] elapsed=%.5lfs,mkinput=%.5lfs,train_time=%.5lfs,forward=%.5lfs, backward=%.5lf, update=%.5lf\n",
elapsed, mkinput,train_time, forward, backward, update);
XPRINT8(0, stderr, "[INFO] elapsed=%.1fs, step=%d, epoch=%d, tword=%d, sword=%d, loss=%.3f, ppl=%.3f, sppl=%.3f",
elapsed, step, epoch, wordCountTotal, wordCountBatch, loss/wordCount, exp(loss/wordCount), exp(prob/wc));
if (!doUpdate)
XPRINT(0, stderr, " (no update)");
XPRINT(0, stderr, "\n");
mkinput = 0.0;
train_time = 0.0;
forward = 0.0;
backward = 0.0;
update = 0.0;
}
if(nStepCheckpoint > 0 && ++nStepCheck >= nStepCheckpoint){
......
......@@ -25,6 +25,7 @@
* $Update by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2017-11-18 bug fixes
*
*/
#include "halfLib/half/half.hpp"
#include <stdio.h>
#include <stdlib.h>
......@@ -50,6 +51,11 @@
#include "function/Identity.h"
#include "core/CHeader.h"
//#include "halfLib/HalfFloat/umHalf.h"
#ifdef USE_CUDA
// the CUDA stuff
......@@ -376,6 +382,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
XMemCopy(data, devID, tensor.data, tensor.devID, size);
if(dataHost != NULL && tensor.dataHost != NULL)
XMemCopy(dataHost, -1, tensor.dataHost, tensor.devID, size);
XMemCopy(dataHost, -1, tensor.dataHost, tensor.devID, size);
}
else{
DestroyData();
......@@ -1854,6 +1861,16 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
}
}
else if (dataType==X_FLOAT16) {
int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for (int i = beg; i < end; i++) {
halfCPU f = ((halfCPU*)d)[i];
if (i == beg)
fprintf(file, "%hx", f);
else
fprintf(file, " %hx", f);
}
}
else if (dataType == X_INT) {
int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for(int i = beg; i < end; i++){
......@@ -1900,9 +1917,22 @@ dump data to a file
*/
void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, const int n, const int beg, const int verbose)
{
XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
_CopyValues(tensor, &a);
a.Dump(file, label, n, beg, verbose);
if (tensor->dataType == X_FLOAT)
{
XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
_CopyValues(tensor, &a);
a.Dump(file, label, n, beg, verbose);
}
else if (tensor->dataType == X_FLOAT16)
{
XTensor a(tensor->order, tensor->dimSize, X_FLOAT, tensor->denseRatio, tensor->devID, tensor->mem);
_ConvertDataType(tensor, &a);
a.Dump(file, label, n, beg, verbose);
}
else
{
ShowNTErrors("TO DO!");
}
}
/*
......@@ -1980,6 +2010,14 @@ void XTensor::Read(FILE * file, const char * label)
}
}
}
else if (dataType==X_FLOAT16){
for (int i = 0; i < unitNum; i++) {
halfCPU * f = ((halfCPU*)data) + i;
if (fscanf(file, "%hx", f) < 1) {
ShowNTErrors("Incorrect tensor format!");
}
}
}
else {
ShowNTErrors("TODO!");
}
......@@ -2006,15 +2044,13 @@ void XTensor::Read(FILE * file, const char * label)
}
}
do {
c = fgetc(file);
} while (c != '\n' && c != EOF);
XMemCopy(dataBackup, devID, data, -1, GetDataSizeInChar());
data = dataBackup;
delete[](char*)dataBuf;
delete[](char *)dataBuf;
}
/*
......
......@@ -97,7 +97,7 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
/* copy the data from GPU memory to CPU memory */
void CudaGPUToCPUFlush(XTensor * tensor)
{
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
//CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论