Commit 35e084b0 by xiaotong

add padding on the encoder side for t2t MT

parent 6f90577d
...@@ -174,10 +174,10 @@ make the network for machine translation (with the output softmax layer) ...@@ -174,10 +174,10 @@ make the network for machine translation (with the output softmax layer)
>> inputEnc - input tensor of the encoder >> inputEnc - input tensor of the encoder
>> inputDec - input tensor of the decoder >> inputDec - input tensor of the decoder
>> output - output tensor (distribution) >> output - output tensor (distribution)
>> padding - padding of the sequences >> paddingEnc - padding of the sequences (on the encoder side)
>> isTraining - indicates whether the model is for training >> isTraining - indicates whether the model is for training
*/ */
void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &padding, bool isTraining) void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, bool isTraining)
{ {
XTensor encoding; XTensor encoding;
XTensor decoding; XTensor decoding;
...@@ -199,11 +199,44 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe ...@@ -199,11 +199,44 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
_SetDataLowTri(&maskDec, 1e9F, 0); _SetDataLowTri(&maskDec, 1e9F, 0);
_ScaleAndShiftMe(&maskDec, 1.0F, -1e9F); _ScaleAndShiftMe(&maskDec, 1.0F, -1e9F);
/* padding on the source side */
int * dimsPadding = new int[paddingEnc.order + 2];
for (int i = 0; i < paddingEnc.order - 1; i++)
dimsPadding[i] = paddingEnc.GetDim(i);
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.denseRatio, paddingEnc.devID, paddingEnc.mem);
/* mask of the padding */
_Unsqueeze(&paddingEnc, padding2, paddingEnc.order - 1, paddingEnc.GetDim(-1));
_Unsqueeze(padding2, padding3, 0, nhead);
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensor(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
_Sum(&maskEnc, padding3, &maskEnc);
encoding = MakeEncoder(inputEnc, maskEnc, isTraining); encoding = MakeEncoder(inputEnc, maskEnc, isTraining);
decoding = MakeDecoder(inputDec, encoding, maskDec, isTraining); decoding = MakeDecoder(inputDec, encoding, maskDec, isTraining);
outputLayer.Make(decoding, output); outputLayer.Make(decoding, output);
delete[] dims; delete[] dims;
delete[] dimsPadding;
DelTensorBuf(padding2);
DelTensorBuf(padding3);
} }
/* /*
......
...@@ -78,7 +78,7 @@ public: ...@@ -78,7 +78,7 @@ public:
void MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining); void MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool isTraining);
/* make the network for machine translation (with the output softmax layer) */ /* make the network for machine translation (with the output softmax layer) */
void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &padding, bool isTraining); void MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTensor &paddingEnc, bool isTraining);
/* get parameter matrics */ /* get parameter matrics */
void GetParams(XList &list); void GetParams(XList &list);
......
...@@ -166,7 +166,8 @@ public: ...@@ -166,7 +166,8 @@ public:
/* load a batch of sequences */ /* load a batch of sequences */
int LoadBatch(FILE * file, bool isLM, int LoadBatch(FILE * file, bool isLM,
XTensor * batch, XTensor * padding, XTensor * output, XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
int * seqs, int * seqs,
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
...@@ -174,14 +175,16 @@ public: ...@@ -174,14 +175,16 @@ public:
/* load a batch of sequences (for language modeling) */ /* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file, int LoadBatchLM(FILE * file,
XTensor * batch, XTensor * padding, XTensor * output, XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
int * seqs, int vs, int sBatch, int wBatch, int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem); int devID, XMem * mem);
/* load a batch of sequences (for machine translation) */ /* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file, int LoadBatchMT(FILE * file,
XTensor * batch, XTensor * padding, XTensor * output, XTensor * batchEnc, XTensor * paddingEnc,
XTensor * batchDec, XTensor * paddingDec,
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch, int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem); int devID, XMem * mem);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论