add positional embedding

4c5776f0 · huchi · ac6ed3a1 · 4c5776f0 · 4c5776f0 · 4c5776f0
Commit 4c5776f0 authored Feb 17, 2020 by huchi
--- a/source/sample/transformer/T2TDecoder.cpp
+++ b/source/sample/transformer/T2TDecoder.cpp
@@ -116,7 +116,7 @@ XTensor AttDecoder::Make(XTensor &inputDec, XTensor &outputEnc, XTensor *mask, X
 {
    XTensor x;

-    x = embedder.Make(inputDec, inputDec.GetDim(1));
+    x = embedder.Make(inputDec, inputDec.GetDim(1), true);

    /* dropout */
    if(isTraining && dropoutP > 0)

--- a/source/sample/transformer/T2TEmbedding.cpp
+++ b/source/sample/transformer/T2TEmbedding.cpp
@@ -100,7 +100,7 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
    
    /* padding zeros */
    int padStart = padIdx * eSize;
-    for (int i = padStart; i < padStart + eSize; ++i)
+    for (int i = padStart; i < padStart + eSize; i++)
        data[i] = 0.F;

    posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
@@ -111,55 +111,66 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length, int padIdx)
 /* 
 make the network 
 */
-XTensor T2TEmbedder::Make(XTensor &input, int prevLen)
+XTensor T2TEmbedder::Make(XTensor &input, int prevLen, int nstep, bool isDec)
 {
-    ///* assert padding index is 1 */
-
-    //CheckNTErrors(input.order > 1, "Wrong input tensor size!");
-    //CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!");
-    //CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
-    //CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
-
-    //
-    //XTensor wordEmbedding, position, posEmbedding;
-    //InitTensor(&position, &input);
-
-    //int* posData = new int[input.unitNum];
-
-    //XTensor inputCPU;
-    //InitTensorOnCPU(&inputCPU, &input);
-    //_CopyValues(&input, &inputCPU);
-
-
-    //for (int i = 0; i < inputCPU.GetDim(0); i++) {
-    //    int startNoPad = 2 + prevLen - 1;
-    //    int* p = ((int*)inputCPU.data) + i * inputCPU.GetDim(1);
-    //    for (int j = 0; j < inputCPU.GetDim(1); j++) {
-    //        if (p[j] == 1) {
-    //            posData[i * inputCPU.GetDim(1) + j] = 1;
-    //        }
-    //        else {
-    //            posData[i * inputCPU.GetDim(1) + j] = startNoPad++;
-    //        }
-    //    }
-    //}
-
-    //position.SetData(posData, position.unitNum);
-    //delete[] posData;
-
-    ///* we make positional embeddings first */
-    //if(true){
-    //    posEmbedding = Gather(posEmbeddingBase, position);
-    //}
-
+    
+	/* assert padding index is 1 */
+
+	CheckNTErrors(input.order > 1, "Wrong input tensor size!");
+	CheckNTErrors(input.dimSize[input.order - 1] < maxLength, "The sequence is too long!");
+	CheckNTErrors(vSize > 0, "set vocabulary size by \"-vsize\"");
+	CheckNTErrors(eSize > 0, "set embedding size by \"-esize\"");
+
+	XTensor wordEmbedding, position, posEmbedding;
+	InitTensorV2(&position, &input);
+
+	int* posData = new int[input.unitNum];
+
+	XTensor inputCPU;
+	InitTensorOnCPU(&inputCPU, &input);
+	_CopyValues(&input, &inputCPU);
+
+
+	if (!isDec)
+	{
+		for (int i = 0; i < inputCPU.GetDim(0); i++) {
+			int startNoPad = 2 + prevLen;
+			int* p = ((int*)inputCPU.data) + i * inputCPU.GetDim(1);
+			for (int j = 0; j < inputCPU.GetDim(1); j++) {
+				if (p[j] == 1) {
+					posData[i * inputCPU.GetDim(1) + j] = 1;
+				}
+				else {
+					posData[i * inputCPU.GetDim(1) + j] = startNoPad++;
+				}
+			}
+		}
+		position.SetData(posData, position.unitNum);
+	}
+	else
+	{
+		for (int i = 0; i < position.GetDim(0); i++) {
+			for (int j = 0; j < position.GetDim(1); j++) {
+				position.Set2DInt(nstep + 2, i, j);
+			}
+		}
+
+	}
+
+
+	delete[] posData;
+
+	/* we make positional embeddings first */
+	if (true) {
+		posEmbedding = Gather(posEmbeddingBase, position);
+	}
    /* then we make word embeddings */
-    XTensor wordEmbedding;
    wordEmbedding = Gather(w, input);

    wordEmbedding = Linear(wordEmbedding, (float)sqrt((float)eSize));

    /* we sum over the two embeddings */
-    return wordEmbedding;
+	return Sum(wordEmbedding, posEmbedding);
 }

 }
--- a/source/sample/transformer/T2TEmbedding.h
+++ b/source/sample/transformer/T2TEmbedding.h
@@ -77,7 +77,7 @@ public:
    void MakePosEmbedding(int eSize, int d, int length, int padIdx);

    /* make the network */
-    XTensor Make(XTensor &input, int prevLen=0);
+    XTensor Make(XTensor &input, int prevLen=0, int nstep = -1, bool isDec = false);
 };

 }

--- a/source/sample/transformer/T2TPredictor.cpp
+++ b/source/sample/transformer/T2TPredictor.cpp
@@ -166,7 +166,7 @@ void T2TPredictor::Predict(T2TStateBundle* next, XTensor* encoding, XTensor* inp
        inputDec = GetLastPrediction(s);
        inputDec.SetDevice(inputEnc->devID);
    }
-    
+    inputDec.Dump(stderr, "inputDec");

    /* prediction probabilities */
    XTensor& output = next->prob;

--- a/source/sample/transformer/T2TSearch.cpp
+++ b/source/sample/transformer/T2TSearch.cpp
@@ -464,8 +464,7 @@ void T2TSearch::Collect(T2TStateBundle* beam)
    for (int i = 0; i < beam->stateNum; i++) {
        T2TState& state = states[i];

-        CheckNTErrors(state.pid >= 0 && state.pid < batchSize,
-            "Invalid sample id!");
+        CheckNTErrors(state.pid >= 0 && state.pid < batchSize, "Invalid sample id!");

        /* check if this is the first end symbol. It is false
           if there have been end symbols in previously generated words. */