bug fixes

7180a4a3 · xiaotong · 2e7d5663 · 7180a4a3 · 7180a4a3 · 7180a4a3
Commit 7180a4a3 authored Jun 20, 2019 by xiaotong
--- a/source/sample/transformer/T2TPredictor.cpp
+++ b/source/sample/transformer/T2TPredictor.cpp
@@ -31,6 +31,7 @@ namespace transformer
 T2TStateBundle::T2TStateBundle()
 {
    states = NULL;
+    isStart = false;
 }
 /* de-constructor */
@@ -56,7 +57,8 @@ void T2TStateBundle::MakeStates(int num)
    for(int i = 0; i < num; i++){
        states[i].prediction = -1;
        states[i].pid = T2T_PID_EMPTY;
-        states[i].isEnd = 0;
+        states[i].isEnd = false;
+        states[i].isStart = false;
        states[i].prob = 0;
        states[i].probPath = 0;
        states[i].modelScore = 0;
@@ -103,7 +105,7 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
    InitTensor(&state->probPath, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem);
    InitTensor(&state->nstep, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem);
-    InitTensor(&state->endMark, input->order, dims, X_FLOAT, 1.0F, input->devID, input->mem);
+    InitTensor(&state->endMark, input->order, dims, X_INT, 1.0F, input->devID, input->mem);
    state->probPath.SetZeroAll();
    state->nstep.SetZeroAll();
@@ -160,8 +162,14 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, XTensor * 
    if(inputLast == NULL)
        inputDec = Identity(dummy);
    else{
-        XTensor inputDecSlide = SelectRange(*inputLast, inputLast->order - 1, 0, inputLast->GetDim(-1) - 2);
+        inputDec = GeneratePaths(s);
-        inputDec = Concatenate(inputDecSlide, dummy, inputDecSlide.order - 1);
+        for(int i = 0; i < inputEnc->order - 1; i++)
+            dims[i] = inputEnc->GetDim(i);
+        dims[inputEnc->order - 1] = inputDec.GetDim(-1);
+        inputDec.Resize(inputEnc->order, dims, X_INT);
+        inputDec.SetDevice(inputEnc->devID, inputEnc->mem);
+        inputDec = Concatenate(inputDec, dummy, inputDec.order - 1);
    }
    /* prediction probabilities */
@@ -213,5 +221,46 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, XTensor * 
    next->layersDec.Add(&output);
 }
+/* 
+generate paths up to the states of the current step 
+>> state - state bundle of the current step
+*/
+XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
+{
+    CheckNTErrors(state->stateNum >= 0, "Illegal state!");
+    int distance = -1;
+    for(int i = 0; i < state->stateNum; i++){
+        T2TState * cur = state->states + i;
+        int nsteps = 0;
+        while(cur != NULL){
+            nsteps++;
+            cur = cur->last;
+        }
+        if(nsteps > distance)
+            distance = nsteps;
+    }
+    XTensor path;
+    InitTensor2D(&path, state->stateNum, distance, X_INT);
+    path.SetZeroAll();
+    for(int i = 0; i < state->stateNum; i++){
+        T2TState * cur = state->states + i;
+        int nsteps = 0;
+        while(cur != NULL){
+            nsteps++;
+            path.Set2DInt(cur->prediction, i, distance - nsteps);
+            cur = cur->last;
+        }
+    }
+    return path;
+}
 }
--- a/source/sample/transformer/T2TPredictor.h
+++ b/source/sample/transformer/T2TPredictor.h
@@ -39,13 +39,16 @@ public:
    /* we assume that the prediction is an integer */
    int prediction;
-    /* id of the problem. One can regard as the sentence id when we 
+    /* id of the problem. One can regard it as the sentence id when we 
-       translated a number of sentences in the batched manner. It is 
+       translate a number of sentences in the batched manner. The hypothesis 
-       an empty hypothesis if id = -1 */
+       is empty if id = -1 */
    int pid;
    /* indicates whether the state is an end */
-    int isEnd;
+    bool isEnd;
+    /* indicates whether the state is the start */
+    bool isStart;
    /* probability of every prediction (last state of the path) */
    float prob;
@@ -53,7 +56,7 @@ public:
    /* probability of every path */
    float probPath;
-    /* model score of every path */
+    /* model score of every path. A model score = path probability + some other stuff */
    float modelScore;
    /* nubmer of steps we go over so far */
@@ -101,6 +104,9 @@ public:
    /* number of states */
    int stateNum;
+    /* indicates whether it is the first state */
+    bool isStart;
 public:
    /* constructor */
    T2TStateBundle();

--- a/source/sample/transformer/T2TSearch.cpp
+++ b/source/sample/transformer/T2TSearch.cpp
@@ -32,7 +32,7 @@ namespace transformer
 T2TSearch::T2TSearch()
 {
    fullHypos = NULL;
-    endSymbols = NULL;
+    endSymbols = new int[32];
 }
 /* de-constructor */
@@ -53,6 +53,10 @@ void T2TSearch::Init(int argc, char ** argv)
 {
    LoadParamInt(argc, argv, "beamsize", &beamSize, 1);
    LoadParamFloat(argc, argv, "lenalpha", &alpha, 0.2F);
+    LoadParamInt(argc, argv, "endid", endSymbols, -1);
+    if(endSymbols[0] >= 0)
+        endSymbolNum = 1;
 }
 /* 
@@ -85,6 +89,8 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
    /* create the first state */
    predictor.Create(model, &encoding, input, beamSize, first);
+    first->isStart = true;
    /* generate the sequence from left to right */
    for(int i = 0 ; i < maxLength; i++){
        T2TStateBundle * cur = states + i;
@@ -101,6 +107,9 @@ void T2TSearch::Search(T2TModel * model, XTensor * input, XTensor * padding, XTe
        /* beam pruning */
        Generate(next);
+        /* expand the search graph */
+        Expand(cur, next);
    }
    delete[] states;
@@ -170,9 +179,10 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
    /* score = log-prob/lp */
    _DivDim(&score, &lp, &score, 0);
-    InitTensor(&mask, &prev->endMark);
+    InitTensor(&mask, 
-    CopyValues(prev->endMark, mask);
+               prev->endMark.order, prev->endMark.dimSize, X_FLOAT, 1.0F, 
-    _ScaleAndShiftMe(&mask, -1e9F);
+               prev->endMark.devID, prev->endMark.mem);
+    _SetDataFixedCond(&mask, &prev->endMark, -1e9F);
    mask.Reshape(mask.unitNum);
@@ -228,9 +238,11 @@ void T2TSearch::Generate(T2TStateBundle * beam)
    score.Reshape(order, dimsBeam);
    /* keep the most promissing candidates in the beam */
-    //TopK(score, scoreTopK, index, -1, beamSize);
+    TopK(score, scoreTopK, index, -1, beamSize);
    CopyValues(index, preID);
+    preID.Dump(stderr, "preid:");
    int sizeVocab = score.GetDim(-1);
@@ -246,6 +258,8 @@ void T2TSearch::Generate(T2TStateBundle * beam)
       in the vocabulary by dividing it with vocab-size and computing the remainder. */
    Mod(index, sizeVocab);
+    preID.Dump(stderr, "preid:");
    score.Reshape(order, dims);
    /* we keep the top-k scores */
@@ -292,25 +306,35 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
    InitTensorOnCPU(&probPath, &probPathRef);
    InitTensorOnCPU(&prediction, &predictionRef);
    InitTensorOnCPU(&endMarkCPU, &predictionRef);
+    InitTensor(&endMark, &predictionRef);
    /* we copy the data to CPU because the frequent access to GPU is slow
       and we can speed-up the process by doing the job on CPU. */
    CopyValues(idRef, id);
    CopyValues(modelScoreRef, modelScore);
-    CopyValues(prob, probRef);
+    CopyValues(probRef, prob);
    CopyValues(probPathRef, probPath);
+    CopyValues(predictionRef, prediction);
+    idRef.Dump(stderr, "idref:");
    CheckNTErrors(beam->stateNum == id.unitNum, "Errors occur in counting!");
-    /* we keep information on the states of the graph. All these are maintained 
+    /* Related variables are kept on the states of the graph. All these are 
-       on CPUs to ease the implementation of requent access and modification of
+       maintained on CPUs to ease the implementation of requent access and 
-       the states. An alternative is to do this on GPUs but it needs much more
+       modification of the states. An alternative is to do this on GPUs but 
-       coding work and the speed-up is not obvious. */
+       it needs much more coding work and the speed-up is not obvious. */
    for(int i = 0; i < beam->stateNum; i++){
        T2TState & state = states[i];
        /* pointer to the previous state */
-        state.last = prev->states + id.GetInt(i);
+        if(prev->isStart)
+            state.last =  NULL;
+        else{
+            int offset = id.GetInt(i);
+            state.last = prev->states + offset;
+            CheckNTErrors(offset >= 0 && offset < prev->stateNum, "Wrong state index!");
+        }
        /* scores */
        state.modelScore = modelScore.Get(i);
@@ -320,8 +344,10 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
        /* prediction */
        state.prediction = prediction.GetInt(i);
+        CheckNTErrors(state.prediction >= 0, "Illegal prediction!");
        /* check if it is the end of the sequence */
-        state.isEnd = IsEnd(state.prediction) ? 1 : 0;
+        state.isEnd = IsEnd(state.prediction);
        /* set the ending mark */
        endMarkCPU.SetInt(state.isEnd, i);