/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/

#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/ReduceMax.h"

namespace nts { // namespace nt(NiuTrans.Tensor)
                /* case 1 */
    bool TestReduceMax1()
    {
        /* a tensor of size 2 * 4 */
        int order = 2;
        int order_reduce = 1;
        int * dimSize = new int[order];
        dimSize[0] = 2;
        dimSize[1] = 4;

        int unitNum = 1;
        for (int i = 0; i < order; i++)
            unitNum *= dimSize[i];
        /* a tensor of size 4 */
        int * dimSize_reduce_a = new int[order_reduce];
        dimSize_reduce_a[0] = 4;

        int unitNum_a = 1;
        for (int i = 0; i < order_reduce; i++)
            unitNum_a *= dimSize_reduce_a[i];
        /* a tensor of size 2 */
        int * dimSize_reduce_b = new int[order_reduce];
        dimSize_reduce_b[0] = 2;

        int unitNum_b = 1;
        for (int i = 0; i < order_reduce; i++)
            unitNum_b *= dimSize_reduce_b[i];


        DTYPE aData[2][4] = { { 0.0,   1.0,   2.0,   3.0 },
                              { 4.0,   5.0,   6.0,   7.0 } };
        DTYPE bData[2][4] = { { 1.0,  -1.0,  -3.0,  -5.0 },
                              { -7.0, -9.0, -11.0, -13.0 } };
        DTYPE answer_a[4] = { 4.0,  5.0,  6.0,  7.0 };
        DTYPE answer_b[2] = { 1.0,  -7.0 };

        /* CPU test */
        bool cpuTest = true;

        /* create tensors */
        XTensor * a = NewTensor(order, dimSize);
        XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
        XTensor * b = NewTensor(order, dimSize);
        XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);

        /* initialize variables */
        a->SetData(aData, unitNum);
        b->SetData(bData, unitNum);


        /* call reduce max function */
        ReduceMax(a, reduce_a, 0);
        ReduceMax(b, reduce_b, 1);

        //DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
        //for (int i = 0; i < unitNum_a; i++)
        //    printf("%f ", *reduce_a_data++);
        //printf("\n");
        //DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
        //for (int i = 0; i < unitNum_b; i++)
        //    printf("%f ", *reduce_b_data++);

        /* check results */
        cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);

#ifdef USE_CUDA
        /* GPU test */
        bool gpuTest = true;

        /* create tensor */
        XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
        XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
        XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
        XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);

        /* Initialize variables */
        aGPU->SetData(aData, unitNum);
        bGPU->SetData(bData, unitNum);

        /* call reduce max function */
        ReduceMax(aGPU, reduce_aGPU, 0);
        ReduceMax(bGPU, reduce_bGPU, 1);

        /* check results */
        gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);

        /* destroy variables */
        delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
        delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
        return cpuTest && gpuTest;
#else
        /* destroy variables */
        delete a;
        delete b;
        return cpuTest;
#endif // USE_CUDA
    }

    bool TestReduceMaxForLargescale()
    {
        /* a tensor of size 10000 * 500 */
        int order = 2;
        int order_reduce = 1;
        int * dimSize = new int[order];
        dimSize[0] = 10000;
        dimSize[1] = 500;

        int unitNum = 1;
        for (int i = 0; i < order; i++)
            unitNum *= dimSize[i];
        /* a tensor of size 500 */
        int * dimSize_reduce_a = new int[order_reduce];
        dimSize_reduce_a[0] = 500;

        int unitNum_a = 1;
        for (int i = 0; i < order_reduce; i++)
            unitNum_a *= dimSize_reduce_a[i];
        /* a tensor of size 10000 */
        int * dimSize_reduce_b = new int[order_reduce];
        dimSize_reduce_b[0] = 10000;

        int unitNum_b = 1;
        for (int i = 0; i < order_reduce; i++)
            unitNum_b *= dimSize_reduce_b[i];

        DTYPE * data = new DTYPE[5000000];
        DTYPE * tmp = data;
        for (int i = 0; i < unitNum; i++)
            *tmp++ = 1;
        DTYPE answer_a[500];
        for (int i = 0; i < unitNum_a; i++)
            answer_a[i] = 1;
        DTYPE answer_b[10000];
        for (int i = 0; i < unitNum_b; i++)
            answer_b[i] = 1;

        /* CPU test */
        bool cpuTest = true;

        /* create tensors */
        XTensor * a = NewTensor(order, dimSize);
        XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
        XTensor * b = NewTensor(order, dimSize);
        XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);

        /* initialize variables */
        a->SetData(data, unitNum);
        b->SetData(data, unitNum);
        /* call reduce max function */
        ReduceMax(a, reduce_a, 0);
        ReduceMax(b, reduce_b, 1);

        /* check results */
        cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);

#ifdef USE_CUDA
        /* GPU test */
        bool gpuTest = true;

        /* create tensor */
        XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
        XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
        XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
        XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);

        /* Initialize variables */
        aGPU->SetData(data, unitNum);
        bGPU->SetData(data, unitNum);

        /* call reduce max function */
        ReduceMax(aGPU, reduce_aGPU, 0);
        ReduceMax(bGPU, reduce_bGPU, 1);

        /* check results */
        gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);

        /* destroy variables */
        delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
        delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
        return cpuTest && gpuTest;
#else
        /* destroy variables */
        delete a;
        delete b;
        return cpuTest;
#endif // USE_CUDA
    }

    /* other cases */
    /*
    TODO!!
    */

    /* test for Sum Function */
    extern "C"
        bool TestReduceMax()
    {
        XPRINT(0, stdout, "[TEST ReduceMax]\n");
        bool returnFlag = true, caseFlag = true;

        /* case 1 test */
        caseFlag = TestReduceMax1();
        if (!caseFlag) {
            returnFlag = false;
            XPRINT(0, stdout, ">> case 1 failed!\n");
        }
        else
            XPRINT(0, stdout, ">> case 1 passed!\n");

        /* case 2 test */
        caseFlag = TestReduceMaxForLargescale();
        if (!caseFlag) {
            returnFlag = false;
            XPRINT(0, stdout, ">> case 2 failed!\n");
        }
        else
            XPRINT(0, stdout, ">> case 2 passed!\n");

        ///* other cases test */
        ///*
        //TODO!!
        //*/

        if (returnFlag) {
            XPRINT(0, stdout, ">> All Passed!\n");
        }
        else
            XPRINT(0, stdout, ">> Failed!\n");

        XPRINT(0, stdout, "\n");

        return returnFlag;
    }

} // namespace nt(NiuTrans.Tensor)
