Commit 6946149c by liyinqiao

Bug Fixed (Debuging for Transformer)

1. Global mem mode updated;
2. XTensor function updated to v2 interface;
3. Transformer model updated to v2 interface;
4. FNNLM bug fixed;
5. Other little bugs fixed.
parent 95612306
......@@ -71,11 +71,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID, mem);
InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
......@@ -135,9 +135,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3;
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID, mem);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID, mem);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID, mem);
InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2);
split.Add(&k2);
......
......@@ -369,11 +369,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
dims[1] = max;
dims[2] = vSize;
InitTensor2D(batchEnc, sc, max, X_INT, devID, mem);
InitTensor2D(label, sc, max, X_INT, devID, mem);
InitTensor(gold, 3, dims, X_FLOAT, 1.0F, devID, mem);
InitTensor2D(paddingEnc, sc, max, X_FLOAT, devID, mem);
InitTensor2D(paddingDec, sc, max, X_FLOAT, devID, mem);
InitTensor2DV2(batchEnc, sc, max, X_INT, devID);
InitTensor2DV2(label, sc, max, X_INT, devID);
InitTensorV2(gold, 3, dims, X_FLOAT, devID);
InitTensor2DV2(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2DV2(paddingDec, sc, max, X_FLOAT, devID);
batchEnc->SetZeroAll();
label->SetZeroAll();
......@@ -437,12 +437,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount);
/*XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem);
/*XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
_ConvertDataType(batchEnc, tmp);
_NotEqual(tmp, paddingEnc, 0);
DelTensorBuf(tmp);
XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem);
XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
_ConvertDataType(batchEnc, tmp2);
_NotEqual(tmp2, paddingDec, 0);
DelTensorBuf(tmp2);*/
......@@ -569,12 +569,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int sCount = sc/2;
int seqSize = 0;
InitTensor2D(batchEnc, sCount, maxEnc, X_INT, devID, mem);
InitTensor2D(paddingEnc, sCount, maxEnc, X_FLOAT, devID, mem);
InitTensor2D(batchDec, sCount, maxDec, X_INT, devID, mem);
InitTensor2D(paddingDec, sCount, maxDec, X_FLOAT, devID, mem);
InitTensor2D(label, sCount, maxDec, X_INT, devID, mem);
//InitTensor(gold, 3, dimsDec, X_FLOAT, 1.0F, devID, mem);
InitTensor2DV2(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2DV2(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2DV2(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2DV2(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2DV2(label, sCount, maxDec, X_INT, devID);
//InitTensorV2(gold, 3, dimsDec, X_FLOAT, devID);
batchEnc->SetZeroAll();
paddingEnc->SetZeroAll();
......@@ -613,7 +613,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
ws = wCountEnc;
batchEnc->SetData(batchEncValues, batchEnc->unitNum);
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc);
//XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem);
//XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
//_ConvertDataType(batchEnc, tmp);
//tmp->Dump(stderr, "tmp:");
//_NotEqual(tmp, paddingEnc, 0);
......@@ -662,7 +662,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
label->SetData(labelValues, label->unitNum);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad);
//XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem);
//XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
//_ConvertDataType(batchDec, tmp2);
//_NotEqual(tmp2, paddingDec, 0);
//DelTensorBuf(tmp2);
......
......@@ -64,7 +64,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v);
......@@ -81,7 +81,7 @@ make positional embeddings (of size eSize * length)
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem);
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum];
......@@ -145,9 +145,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */
//if(!match){
if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem);
InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem);
XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
......@@ -62,10 +62,10 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem);
float scale = 1.0F;
......
......@@ -56,8 +56,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem);
InitTensor1D(&b, d, X_FLOAT, devID, mem);
InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll();
......
......@@ -163,15 +163,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.denseRatio, padding.devID, padding.mem);
XTensor * padding2 = NewTensorBufV2(padding.order + 1, dimsPadding, padding.dataType,
padding.devID);
for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
//XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType,
// padding.denseRatio, padding.devID, padding.mem);
//XTensor * padding3 = NewTensorBufV2(padding.order + 2, dimsPadding, padding.dataType,
// padding.devID);
//
///* mask of the padding */
//_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1));
......@@ -241,7 +241,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead;
dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem);
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in
......@@ -251,11 +251,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem);
XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
......@@ -271,15 +271,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
/* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
......@@ -287,7 +287,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3);
InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
......@@ -315,15 +315,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
/* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
......@@ -331,7 +331,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3);
InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
......@@ -361,7 +361,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead;
dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem);
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in
......@@ -376,11 +376,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem);
XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
......
......@@ -61,7 +61,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem);
InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......
......@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensor(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem);
InitTensor(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID, input->mem);
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll();
state->nstep.SetZeroAll();
......@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem);
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
......@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, 1.0F, paddingEnc->devID, paddingEnc->mem);
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1);
XTensor maskDec;
......@@ -213,8 +213,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
int stride = decoding.GetDim(decoding.order - 2);
InitTensor1D(&selectSrc, 1, X_INT);
InitTensor1D(&selectTgt, 1, X_INT);
InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0);
......@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
}
XTensor path;
InitTensor2D(&path, state->stateNum, distance, X_INT);
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
......
......@@ -192,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
for(int i = 0; i < order; i++)
dims[i] = prob.GetDim(i);
InitTensor(&score, &prob);
InitTensor(&probPath, &prob);
InitTensorV2(&score, &prob);
InitTensorV2(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize);
......@@ -204,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensor(&len, &lenPrev);
InitTensor(&lp, &lenPrev);
InitTensorV2(&len, &lenPrev);
InitTensorV2(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
......@@ -225,9 +225,9 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&score, &firstMask, &score, 0);
}
InitTensor(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT, 1.0F,
prev->endMark.devID, prev->endMark.mem);
InitTensorV2(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT,
prev->endMark.devID);
_SetDataFixedCond(&mask, &prev->endMark, -1e9F);
mask.Reshape(mask.unitNum);
......@@ -279,12 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize;
InitTensor(&scoreTopK, order, dimsTopK, score.dataType,
1.0F, score.devID, score.mem);
InitTensor(&index, order, dimsTopK, X_INT,
1.0F, score.devID, score.mem);
InitTensor(&preID, order, dimsTopK, X_INT,
1.0F, -1);
InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType,
score.devID);
InitTensorV2(&index, order, dimsTopK, X_INT,
score.devID);
InitTensorV2(&preID, order, dimsTopK, X_INT, -1);
score.Reshape(order, dimsBeam);
......@@ -308,12 +307,12 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims);
/* we keep the top-k scores */
InitTensor(&score, &scoreTopK);
InitTensorV2(&score, &scoreTopK);
CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */
XTensor indexCPU;
InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1);
InitTensorV2(&indexCPU, index.order, index.dimSize, index.dataType, -1);
CopyValues(index, indexCPU);
......@@ -324,9 +323,9 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* sequence probability of top-k candidates */
XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK);
InitTensorV2(&probPathTopK, &scoreTopK);
XTensor probTopK;
InitTensor(&probTopK, &scoreTopK);
InitTensorV2(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i);
......@@ -382,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensor(&endMark, &predictionRef);
InitTensorV2(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */
......@@ -503,7 +502,7 @@ void T2TSearch::Dump(XTensor * output)
int dims[3] = {batchSize, beamSize, maxLength};
int * words = new int[maxLength];
InitTensor(output, 3, dims, X_INT);
InitTensorV2(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */
......@@ -588,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i);
InitTensor(&mask, order - 1, dims, X_FLOAT);
InitTensorV2(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) {
......
......@@ -372,7 +372,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
/* prediction probabilities */
XTensor probs;
InitTensor1D(&probs, bSize * length);
InitTensor1DV2(&probs, bSize * length);
XTensor labelOnehot;
......@@ -463,7 +463,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability of each word */
XTensor wprobs;
InitTensor1D(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID, output->mem);
InitTensor1DV2(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)};
probs.Reshape(2, dims);
......@@ -480,7 +480,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability for the batch */
XTensor result;
InitTensor1D(&result, 1, X_FLOAT, output->devID, output->mem);
InitTensor1DV2(&result, 1, X_FLOAT, output->devID);
_ReduceSum(&probs, &result, 1);
return result.Get1D(0);
......@@ -527,7 +527,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */
XTensor * v2 = NewTensorBuf(v, v->devID, v->mem);
XTensor * v2 = NewTensorBufV2(v, v->devID);
_Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d);
_Div(m, v2, v2);
......@@ -598,7 +598,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem);
XTensor * padding2 = NewTensorBufV2(1, &padding->unitNum, X_FLOAT, padding->devID);
_CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0);
......@@ -652,7 +652,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
DTYPE q = 1.0F - p;
DTYPE gift = p / n;
InitTensor(smoothed, gold);
InitTensorV2(smoothed, gold);
_CopyValues(gold, smoothed);
if(p == 0)
......
......@@ -1582,7 +1582,7 @@ void XMemManager::Initialize()
MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
CPUMems[0].Initialize(-1, FREE_ON_THE_FLY, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
/* GPUs */
nGPUMem = 0;
......@@ -1597,7 +1597,7 @@ void XMemManager::Initialize()
MTYPE freeMem = GetAvailableGPUMemory(i);
MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize);
GPUMems[i].Initialize(i, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
GPUMems[i].Initialize(i, FREE_ON_THE_FLY, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
}
#endif
......
......@@ -60,7 +60,7 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_NUM 16
#define MAX_GPU_NUM 16
......
......@@ -101,7 +101,7 @@ XTensor::XTensor(const XTensor * reference)
SetDataPointer();
id = MakeTensorID();
InitTensor(this, reference);
InitTensorV2(this, reference);
}
/*
......@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference)
else{
devID = reference.devID;
mem = reference.mem;
InitTensor(this, &reference);
InitTensorV2(this, &reference);
_CopyValues(&reference, this);
}
......@@ -2168,6 +2168,11 @@ void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID)
{
if (tensor->mem == NULL) {
XMem * myMem = GMems.GetMem(myDevID);
tensor->mem = myMem;
tensor->devID = myMem->devID;
}
if(tensor->mem != NULL){
tensor->Resize(myOrder, myDimSize, myDataType, 1.0F);
}
......@@ -2469,9 +2474,8 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio,
-1);
InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, -1);
}
/* generate a XTensor with no initialization */
......@@ -2574,7 +2578,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensor(myOrder, dims, myDataType, 1.0F, devID);
XTensor * tensor = NewTensorV2(myOrder, dims, myDataType, devID);
if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论