/* NiuTrans.Tensor - an open-source tensor library
 * Copyright (C) 2018, Natural Language Processing Lab, Northestern University. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
 */


#include "T2TModel.h"
#include "T2TUtility.h"
#include "../../tensor/core/CHeader.h"

namespace transformer
{

/* constructor */
T2TModel::T2TModel()
{
    devID = -1;
    mem = NULL;
    isLM = false;
    isMT = false;
    nhead = 1;
}

/* de-constructor */
T2TModel::~T2TModel()
{
    delete mem;
}

/* 
initialize the model 
>> argc - number of arguments
>> argv - list of pointers to the arguments
*/
void T2TModel::InitModel(int argc, const char ** argv)
{
    bool useMem = false;

    LoadParamInt(argc, argv, "dev", &devID, -1);
    LoadParamBool(argc, argv, "mem", &useMem, useMem);
    LoadParamBool(argc, argv, "lm", &isLM, true);
    LoadParamBool(argc, argv, "mt", &isMT, false);
    LoadParamInt(argc, argv, "nhead", &nhead, 8);

    if(useMem){
        delete mem;
        mem = new XMem(devID, UNI_FREE, MILLION * 512, 1024, MILLION * 128);
    }

    encoder.InitModel(argc, argv, isLM, isLM ? 1 : 0, devID, mem);
    outputLayer.InitModel(argc, argv, devID, mem);
}

/* 
make the encoding network
>> input - input tensor
>> mask - the mask for positions that are/not involved in computation
>> skipInputRes - indicates whether we skip the residual connection of the first layer
<< return - encoding result
*/
XTensor T2TModel::MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes)
{
    return encoder.Make(input, mask, skipInputRes);
}

/* 
make the entire network (with the output softmax layer) 
>> input - input tensor
>> output - output tensor (distribution)
*/
void T2TModel::Make(XTensor &input, XTensor &output)
{
    XTensor encoding;
    
    if(isLM){
        /* generate mask to see "previous" words only */
        int len = input.GetDim(input.order - 2);
        int * dims = new int[input.order + 1];
        for(int i = 0; i < input.order; i++)
            dims[i + 1] = input.GetDim(i);
        dims[0] = nhead;
        dims[input.order] = len;
        XTensor mask(input.order + 1, dims, X_FLOAT, 1.0F, input.devID, input.mem);
        
        /* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
           this matrix can be used to prevent the attention to current or following words in
           a given sequence. */
        _SetDataLowTri(&mask, 1e9F, -1);
        _ScaleAndShiftMe(&mask, 1.0F, -1e9F);

        encoding = MakeEncoding(input, mask, true);
        outputLayer.Make(encoding, output);

        delete[] dims;
    }
    else{
        ShowNTErrors("TODO!");
    }
}

}
