Commit 6946149c by liyinqiao

Bug Fixed (Debuging for Transformer)

1. Global mem mode updated;
2. XTensor function updated to v2 interface;
3. Transformer model updated to v2 interface;
4. FNNLM bug fixed;
5. Other little bugs fixed.
parent 95612306
...@@ -71,11 +71,11 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -71,11 +71,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem); InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID, mem); InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
...@@ -135,9 +135,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -135,9 +135,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1); int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3; int d3 = kqv2.GetDim(2) / 3;
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID, mem); InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2); split.Add(&q2);
split.Add(&k2); split.Add(&k2);
......
...@@ -369,11 +369,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -369,11 +369,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
dims[1] = max; dims[1] = max;
dims[2] = vSize; dims[2] = vSize;
InitTensor2D(batchEnc, sc, max, X_INT, devID, mem); InitTensor2DV2(batchEnc, sc, max, X_INT, devID);
InitTensor2D(label, sc, max, X_INT, devID, mem); InitTensor2DV2(label, sc, max, X_INT, devID);
InitTensor(gold, 3, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(gold, 3, dims, X_FLOAT, devID);
InitTensor2D(paddingEnc, sc, max, X_FLOAT, devID, mem); InitTensor2DV2(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2D(paddingDec, sc, max, X_FLOAT, devID, mem); InitTensor2DV2(paddingDec, sc, max, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
label->SetZeroAll(); label->SetZeroAll();
...@@ -437,12 +437,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -437,12 +437,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount);
/*XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem); /*XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
_ConvertDataType(batchEnc, tmp); _ConvertDataType(batchEnc, tmp);
_NotEqual(tmp, paddingEnc, 0); _NotEqual(tmp, paddingEnc, 0);
DelTensorBuf(tmp); DelTensorBuf(tmp);
XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem); XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
_ConvertDataType(batchEnc, tmp2); _ConvertDataType(batchEnc, tmp2);
_NotEqual(tmp2, paddingDec, 0); _NotEqual(tmp2, paddingDec, 0);
DelTensorBuf(tmp2);*/ DelTensorBuf(tmp2);*/
...@@ -569,12 +569,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -569,12 +569,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int sCount = sc/2; int sCount = sc/2;
int seqSize = 0; int seqSize = 0;
InitTensor2D(batchEnc, sCount, maxEnc, X_INT, devID, mem); InitTensor2DV2(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2D(paddingEnc, sCount, maxEnc, X_FLOAT, devID, mem); InitTensor2DV2(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2D(batchDec, sCount, maxDec, X_INT, devID, mem); InitTensor2DV2(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2D(paddingDec, sCount, maxDec, X_FLOAT, devID, mem); InitTensor2DV2(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2D(label, sCount, maxDec, X_INT, devID, mem); InitTensor2DV2(label, sCount, maxDec, X_INT, devID);
//InitTensor(gold, 3, dimsDec, X_FLOAT, 1.0F, devID, mem); //InitTensorV2(gold, 3, dimsDec, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
paddingEnc->SetZeroAll(); paddingEnc->SetZeroAll();
...@@ -613,7 +613,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -613,7 +613,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
ws = wCountEnc; ws = wCountEnc;
batchEnc->SetData(batchEncValues, batchEnc->unitNum); batchEnc->SetData(batchEncValues, batchEnc->unitNum);
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc);
//XTensor * tmp = NewTensorBuf(paddingEnc, devID, mem); //XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
//_ConvertDataType(batchEnc, tmp); //_ConvertDataType(batchEnc, tmp);
//tmp->Dump(stderr, "tmp:"); //tmp->Dump(stderr, "tmp:");
//_NotEqual(tmp, paddingEnc, 0); //_NotEqual(tmp, paddingEnc, 0);
...@@ -662,7 +662,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -662,7 +662,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
label->SetData(labelValues, label->unitNum); label->SetData(labelValues, label->unitNum);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad);
//XTensor * tmp2 = NewTensorBuf(paddingDec, devID, mem); //XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
//_ConvertDataType(batchDec, tmp2); //_ConvertDataType(batchDec, tmp2);
//_NotEqual(tmp2, paddingDec, 0); //_NotEqual(tmp2, paddingDec, 0);
//DelTensorBuf(tmp2); //DelTensorBuf(tmp2);
......
...@@ -64,7 +64,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b ...@@ -64,7 +64,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem, b
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -81,7 +81,7 @@ make positional embeddings (of size eSize * length) ...@@ -81,7 +81,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID, mem); InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -145,9 +145,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -145,9 +145,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, 1.0F, devID, mem); InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, 1.0F, devID, mem); XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -62,10 +62,10 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -62,10 +62,10 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1D(&b1, hSize, X_FLOAT, devID, mem);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID, mem); InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID, mem); InitTensor1D(&b2, outSize, X_FLOAT, devID, mem);
float scale = 1.0F; float scale = 1.0F;
......
...@@ -56,8 +56,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -56,8 +56,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1D(&w, d, X_FLOAT, devID, mem); InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1DV2(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -163,15 +163,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -163,15 +163,15 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding[padding.order - 1] = padding.GetDim(-1); dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1); dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType, XTensor * padding2 = NewTensorBufV2(padding.order + 1, dimsPadding, padding.dataType,
padding.denseRatio, padding.devID, padding.mem); padding.devID);
for(int i = 0; i < padding2->order; i++) for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
//XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType, //XTensor * padding3 = NewTensorBufV2(padding.order + 2, dimsPadding, padding.dataType,
// padding.denseRatio, padding.devID, padding.mem); // padding.devID);
// //
///* mask of the padding */ ///* mask of the padding */
//_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1)); //_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1));
...@@ -241,7 +241,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -241,7 +241,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -251,11 +251,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -251,11 +251,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F); _ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
...@@ -271,15 +271,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -271,15 +271,15 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -287,7 +287,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -287,7 +287,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -315,15 +315,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -315,15 +315,15 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1)); _Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
...@@ -331,7 +331,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -331,7 +331,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3); InitTensorV2(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -361,7 +361,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -361,7 +361,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingDec.devID, paddingDec.mem); InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in This matrix can be used to block the attention to current or following words in
...@@ -376,11 +376,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -376,11 +376,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, 1.0F, paddingEnc.devID, paddingEnc.mem); InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID, paddingEnc.mem); XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
......
...@@ -61,7 +61,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem) ...@@ -61,7 +61,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID, mem); InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......
...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input ...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
dims[i] = input->GetDim(i); dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize; dims[input->order - 1] = beamSize;
InitTensor(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem); InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem); InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID, input->mem); InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll(); state->probPath.SetZeroAll();
state->nstep.SetZeroAll(); state->nstep.SetZeroAll();
...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[i] = inputEnc->GetDim(i); dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem); InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol); _SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
...@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputDec.order - 1] = inputDec.GetDim(-1); dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec; XTensor paddingDec;
InitTensor(&paddingDec, inputDec.order, dims, X_INT, 1.0F, paddingEnc->devID, paddingEnc->mem); InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1); SetDataFixedInt(paddingDec, 1);
XTensor maskDec; XTensor maskDec;
...@@ -213,8 +213,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -213,8 +213,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
int stride = decoding.GetDim(decoding.order - 2); int stride = decoding.GetDim(decoding.order - 2);
InitTensor1D(&selectSrc, 1, X_INT); InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1D(&selectTgt, 1, X_INT); InitTensor1DV2(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0); selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0); selectTgt.SetInt(0, 0);
...@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state) ...@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
} }
XTensor path; XTensor path;
InitTensor2D(&path, state->stateNum, distance, X_INT); InitTensor2DV2(&path, state->stateNum, distance, X_INT);
path.SetZeroAll(); path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){ for(int i = 0; i < state->stateNum; i++){
......
...@@ -192,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -192,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
for(int i = 0; i < order; i++) for(int i = 0; i < order; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensor(&score, &prob); InitTensorV2(&score, &prob);
InitTensor(&probPath, &prob); InitTensorV2(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize); prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize); score.Reshape(score.unitNum/outputSize, outputSize);
...@@ -204,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -204,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&prob, &probPathPrev, &probPath, 0); _SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensor(&len, &lenPrev); InitTensorV2(&len, &lenPrev);
InitTensor(&lp, &lenPrev); InitTensorV2(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F); _ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
...@@ -225,9 +225,9 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -225,9 +225,9 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&score, &firstMask, &score, 0); _SumDim(&score, &firstMask, &score, 0);
} }
InitTensor(&mask, InitTensorV2(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT, 1.0F, prev->endMark.order, prev->endMark.dimSize, X_FLOAT,
prev->endMark.devID, prev->endMark.mem); prev->endMark.devID);
_SetDataFixedCond(&mask, &prev->endMark, -1e9F); _SetDataFixedCond(&mask, &prev->endMark, -1e9F);
mask.Reshape(mask.unitNum); mask.Reshape(mask.unitNum);
...@@ -279,12 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -279,12 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsTopK[order - 3] = dimsBeam[order - 3]; dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize; dimsTopK[order - 1] = beamSize;
InitTensor(&scoreTopK, order, dimsTopK, score.dataType, InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType,
1.0F, score.devID, score.mem); score.devID);
InitTensor(&index, order, dimsTopK, X_INT, InitTensorV2(&index, order, dimsTopK, X_INT,
1.0F, score.devID, score.mem); score.devID);
InitTensor(&preID, order, dimsTopK, X_INT, InitTensorV2(&preID, order, dimsTopK, X_INT, -1);
1.0F, -1);
score.Reshape(order, dimsBeam); score.Reshape(order, dimsBeam);
...@@ -308,12 +307,12 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -308,12 +307,12 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims); score.Reshape(order, dims);
/* we keep the top-k scores */ /* we keep the top-k scores */
InitTensor(&score, &scoreTopK); InitTensorV2(&score, &scoreTopK);
CopyValues(scoreTopK, score); CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */ /* CPU data (TODO: remove GPU->CPU data copy!!!) */
XTensor indexCPU; XTensor indexCPU;
InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1); InitTensorV2(&indexCPU, index.order, index.dimSize, index.dataType, -1);
CopyValues(index, indexCPU); CopyValues(index, indexCPU);
...@@ -324,9 +323,9 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -324,9 +323,9 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* sequence probability of top-k candidates */ /* sequence probability of top-k candidates */
XTensor probPathTopK; XTensor probPathTopK;
InitTensor(&probPathTopK, &scoreTopK); InitTensorV2(&probPathTopK, &scoreTopK);
XTensor probTopK; XTensor probTopK;
InitTensor(&probTopK, &scoreTopK); InitTensorV2(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) { for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i); dims[i] = probPath.GetDim(i);
...@@ -382,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -382,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
InitTensorOnCPU(&probPath, &probPathRef); InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef); InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef); InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensor(&endMark, &predictionRef); InitTensorV2(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow /* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */ and we can speed-up the process by doing the job on CPU. */
...@@ -503,7 +502,7 @@ void T2TSearch::Dump(XTensor * output) ...@@ -503,7 +502,7 @@ void T2TSearch::Dump(XTensor * output)
int dims[3] = {batchSize, beamSize, maxLength}; int dims[3] = {batchSize, beamSize, maxLength};
int * words = new int[maxLength]; int * words = new int[maxLength];
InitTensor(output, 3, dims, X_INT); InitTensorV2(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1); SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */ /* heap for an input sentence in the batch */
...@@ -588,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) ...@@ -588,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++) for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensor(&mask, order - 1, dims, X_FLOAT); InitTensorV2(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll(); mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) { for (int i = 0; i < mask.unitNum; i++) {
......
...@@ -372,7 +372,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -372,7 +372,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
/* prediction probabilities */ /* prediction probabilities */
XTensor probs; XTensor probs;
InitTensor1D(&probs, bSize * length); InitTensor1DV2(&probs, bSize * length);
XTensor labelOnehot; XTensor labelOnehot;
...@@ -463,7 +463,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -463,7 +463,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1D(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID, output->mem); InitTensor1DV2(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)}; int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)};
probs.Reshape(2, dims); probs.Reshape(2, dims);
...@@ -480,7 +480,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -480,7 +480,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1D(&result, 1, X_FLOAT, output->devID, output->mem); InitTensor1DV2(&result, 1, X_FLOAT, output->devID);
_ReduceSum(&probs, &result, 1); _ReduceSum(&probs, &result, 1);
return result.Get1D(0); return result.Get1D(0);
...@@ -527,7 +527,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -527,7 +527,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0); _ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */ /* v2 = m / (sqrt(v) + delta) */
XTensor * v2 = NewTensorBuf(v, v->devID, v->mem); XTensor * v2 = NewTensorBufV2(v, v->devID);
_Power(v, v2, 0.5F); _Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d); _ScaleAndShiftMe(v2, 1.0F, d);
_Div(m, v2, v2); _Div(m, v2, v2);
...@@ -598,7 +598,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding) ...@@ -598,7 +598,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]); output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem); XTensor * padding2 = NewTensorBufV2(1, &padding->unitNum, X_FLOAT, padding->devID);
_CopyValues(padding, padding2); _CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0); _MultiplyDim(output, padding2, output, 0);
...@@ -652,7 +652,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p) ...@@ -652,7 +652,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
DTYPE q = 1.0F - p; DTYPE q = 1.0F - p;
DTYPE gift = p / n; DTYPE gift = p / n;
InitTensor(smoothed, gold); InitTensorV2(smoothed, gold);
_CopyValues(gold, smoothed); _CopyValues(gold, smoothed);
if(p == 0) if(p == 0)
......
...@@ -1582,7 +1582,7 @@ void XMemManager::Initialize() ...@@ -1582,7 +1582,7 @@ void XMemManager::Initialize()
MTYPE freeMem = GetAvailableMemory(); MTYPE freeMem = GetAvailableMemory();
MTYPE myBufSize = 0; MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize); GetBufferSize(freeMem, &myBufSize);
CPUMems[0].Initialize(-1, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize); CPUMems[0].Initialize(-1, FREE_ON_THE_FLY, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
/* GPUs */ /* GPUs */
nGPUMem = 0; nGPUMem = 0;
...@@ -1597,7 +1597,7 @@ void XMemManager::Initialize() ...@@ -1597,7 +1597,7 @@ void XMemManager::Initialize()
MTYPE freeMem = GetAvailableGPUMemory(i); MTYPE freeMem = GetAvailableGPUMemory(i);
MTYPE myBufSize = 0; MTYPE myBufSize = 0;
GetBufferSize(freeMem, &myBufSize); GetBufferSize(freeMem, &myBufSize);
GPUMems[i].Initialize(i, UNI_FREE, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize); GPUMems[i].Initialize(i, FREE_ON_THE_FLY, MIN_BLOCK_SIZE_FOR_MEMPOOL, MIN_BLOCK_NUM_FOR_MEMPOOL, myBufSize);
} }
#endif #endif
......
...@@ -60,7 +60,7 @@ typedef long long INT_64; ...@@ -60,7 +60,7 @@ typedef long long INT_64;
#define CUDA_HOST_MALLOC 1 #define CUDA_HOST_MALLOC 1
#define MY_PITCH CUDA_PITCH #define MY_PITCH CUDA_PITCH
#define BUF_PITCH 256 #define BUF_PITCH 256
#define MIN_BLOCK_SIZE_FOR_MEMPOOL 128 * 1024 * 1024 #define MIN_BLOCK_SIZE_FOR_MEMPOOL 256 * 1024 * 1024
#define MIN_BLOCK_NUM_FOR_MEMPOOL 1024 #define MIN_BLOCK_NUM_FOR_MEMPOOL 1024
#define MAX_CPU_NUM 16 #define MAX_CPU_NUM 16
#define MAX_GPU_NUM 16 #define MAX_GPU_NUM 16
......
...@@ -101,7 +101,7 @@ XTensor::XTensor(const XTensor * reference) ...@@ -101,7 +101,7 @@ XTensor::XTensor(const XTensor * reference)
SetDataPointer(); SetDataPointer();
id = MakeTensorID(); id = MakeTensorID();
InitTensor(this, reference); InitTensorV2(this, reference);
} }
/* /*
...@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference) ...@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference)
else{ else{
devID = reference.devID; devID = reference.devID;
mem = reference.mem; mem = reference.mem;
InitTensor(this, &reference); InitTensorV2(this, &reference);
_CopyValues(&reference, this); _CopyValues(&reference, this);
} }
...@@ -2168,6 +2168,11 @@ void InitTensorV2(XTensor * tensor, ...@@ -2168,6 +2168,11 @@ void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType,
const int myDevID) const int myDevID)
{ {
if (tensor->mem == NULL) {
XMem * myMem = GMems.GetMem(myDevID);
tensor->mem = myMem;
tensor->devID = myMem->devID;
}
if(tensor->mem != NULL){ if(tensor->mem != NULL){
tensor->Resize(myOrder, myDimSize, myDataType, 1.0F); tensor->Resize(myOrder, myDimSize, myDataType, 1.0F);
} }
...@@ -2469,9 +2474,8 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference) ...@@ -2469,9 +2474,8 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
InitTensor(tensor, reference->order, reference->dimSize, InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio, reference->dataType, -1);
-1);
} }
/* generate a XTensor with no initialization */ /* generate a XTensor with no initialization */
...@@ -2574,7 +2578,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize, ...@@ -2574,7 +2578,7 @@ XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
dims[0] = -abs(dims[0]); dims[0] = -abs(dims[0]);
XTensor * tensor = NewTensor(myOrder, dims, myDataType, 1.0F, devID); XTensor * tensor = NewTensorV2(myOrder, dims, myDataType, devID);
if (tensor->unitNum * tensor->unitSize == 176657664) { if (tensor->unitNum * tensor->unitSize == 176657664) {
tensor->Dump(stderr, "", 200); tensor->Dump(stderr, "", 200);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论