Commit 117d5109 by xiaotong

safer padding code

parent 7382a0ec
...@@ -86,8 +86,9 @@ XTensor T2TModel::MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes) ...@@ -86,8 +86,9 @@ XTensor T2TModel::MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes)
make the entire network (with the output softmax layer) make the entire network (with the output softmax layer)
>> input - input tensor >> input - input tensor
>> output - output tensor (distribution) >> output - output tensor (distribution)
>> padding - padding of the sequences
*/ */
void T2TModel::Make(XTensor &input, XTensor &output) void T2TModel::Make(XTensor &input, XTensor &output, XTensor &padding)
{ {
XTensor encoding; XTensor encoding;
...@@ -107,10 +108,38 @@ void T2TModel::Make(XTensor &input, XTensor &output) ...@@ -107,10 +108,38 @@ void T2TModel::Make(XTensor &input, XTensor &output)
_SetDataLowTri(&mask, 1e9F, 0); _SetDataLowTri(&mask, 1e9F, 0);
_ScaleAndShiftMe(&mask, 1.0F, -1e9F); _ScaleAndShiftMe(&mask, 1.0F, -1e9F);
int * dimsPadding = new int[padding.order + 2];
for(int i = 0; i < padding.order - 1; i++)
dimsPadding[i] = padding.GetDim(i);
dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.denseRatio, padding.devID, padding.mem);
for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType,
padding.denseRatio, padding.devID, padding.mem);
/* mask of the padding */
_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1));
_Unsqueeze(padding2, padding3, 0, nhead);
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
_Sum(&mask, padding3, &mask);
encoding = MakeEncoding(input, mask, true); encoding = MakeEncoding(input, mask, true);
outputLayer.Make(encoding, output); outputLayer.Make(encoding, output);
delete[] dims; delete[] dims;
delete[] dimsPadding;
DelTensorBuf(padding2);
DelTensorBuf(padding3);
} }
else{ else{
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
XTensor MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes); XTensor MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes);
/* make the entire network (with the output softmax layer) */ /* make the entire network (with the output softmax layer) */
void Make(XTensor &input, XTensor &output); void Make(XTensor &input, XTensor &output, XTensor &padding);
/* get parameter matrics */ /* get parameter matrics */
void GetParams(XList &list); void GetParams(XList &list);
......
...@@ -150,7 +150,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -150,7 +150,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
XTensor output; XTensor output;
/* make the network */ /* make the network */
model->Make(batch, output); model->Make(batch, output, padding);
/* make paddings for the output */ /* make paddings for the output */
if(output.GetDim(0) > 1) if(output.GetDim(0) > 1)
...@@ -214,11 +214,9 @@ test the model ...@@ -214,11 +214,9 @@ test the model
*/ */
void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
{ {
int step = 0;
int wc = 0; int wc = 0;
int wordCount = 0; int wordCount = 0;
int wordCountTotal = 0; int wordCountTotal = 0;
bool isEnd = false;
float loss = 0; float loss = 0;
/* data files */ /* data files */
...@@ -267,7 +265,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -267,7 +265,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
XTensor output; XTensor output;
/* make the network */ /* make the network */
model->Make(batch, output); model->Make(batch, output, padding);
int bSize = batch.GetDim(0); int bSize = batch.GetDim(0);
int length = batch.GetDim(1); int length = batch.GetDim(1);
...@@ -675,7 +673,6 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * padding) ...@@ -675,7 +673,6 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]); output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem); XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem);
_CopyValues(padding, padding2); _CopyValues(padding, padding2);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论