/* NiuTrans.Tensor - an open-source tensor library
 * Copyright (C) 2017, Natural Language Processing Lab, Northestern University. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/

#include "../../XTensor.h"
#include "../../XName.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"

namespace nts{ // namespace nts(NiuTrans.Tensor)

/* 
get the max value of the items along a dimension of the tensor

>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
*/
void _ReduceMax(const XTensor * input, XTensor * output, int dim)
{
    CheckNTErrors((input->devID == output->devID || (input->devID < 0 && output->devID < 0)), 
                  "This code must be run on the same device!");
    CheckNTErrors((input && output), "Empty input or output tensors!");
    CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
    CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
    CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
	
	int dimRDI = input->order - dim - 1;
    for(int i = 0; i < input->order; i++){
        if(i < dimRDI){
            CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i]), 
                          "Unmatched tensors!");
        }
        else if(i > dimRDI){
            CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i - 1]), 
                          "Unmatched tensors!");
        }
    }

    if(input->devID >= 0){
#ifdef USE_CUDA
        _CudaReduceMax(input, output, dim);
#endif
    }
    else{
        CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");

        int stride = 1;
        int strideNum = input->dimSizeRDI[dimRDI];
        int blockSize = 1;
        int blockNum = 1;
        for (int i = 0; i < input->order; i++) {
            if (i < dimRDI)
                stride *= input->dimSizeRDI[i];
            else if (i > dimRDI)
                blockNum *= input->dimSizeRDI[i];
        }
        blockSize = stride * strideNum;

        for(int k = 0; k < blockNum; k++){
            DTYPE * ip = (DTYPE*)input->data + blockSize * k;
            DTYPE * op = (DTYPE*)output->data + stride * k;
            for(int i = 0; i < stride; i++){
                DTYPE max = FLOAT_MIN;
                DTYPE * ipe = ip + blockSize;
                for(DTYPE * ipb = ip + i; ipb < ipe; ipb += stride){
                    DTYPE v = *ipb;
                    if(max < v)
                        max = v;
                }
                *(op + i) = max;
            }
        }
    }
}

/* 
get the max value of the items along a dimension of the tensor (return a XTensor structure).
make a new tensor to keep the result and return it

>> input - the input tensor
>> dim - the dimension where the reduction is performed on
<< return - the max value of the items along a dimension of the tensor
*/
XTensor ReduceMax(const XTensor &input, int dim)
{
    CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
	
    int order = input.order - 1;
    int * dimSize = new int[order];
    for(int i = 0; i < order; i++){
        if(i < dim)
            dimSize[i] = input.dimSize[i];
        else if(i >= dim)
            dimSize[i] = input.dimSize[i + 1];
    }

    float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
    XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
    output.SetTMPFlag();

    /* call _ReduceMax function */
    _ReduceMax(&input, &output, dim);
    
    /* tensor connection */
    XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
    XLink::AddParamToHeadInt(&output, dim);

    /* destroy variables */
    delete[] dimSize;

    return output;
}

} // namespace nts(NiuTrans.Tensor)