/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/

#include "../XTensor.h"
#include "TTopK.h"

namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
bool TestTopK1()
{
    /* a input tensor of size 2 * 4 */
    int inputOrder = 2;
    int * inputDimSize = new int[inputOrder];
    inputDimSize[0] = 2;
    inputDimSize[1] = 4;

    int inputUnitNum = 1;
    for (int i = 0; i < inputOrder; i++)
        inputUnitNum *= inputDimSize[i];

    /* a output tensor of size 2 * 4 */
    int outputOrder = 2;
    int * outputDimSize = new int[outputOrder];
    outputDimSize[0] = 2;
    outputDimSize[1] = 4;

    int outputUnitNum = 1;
    for (int i = 0; i < outputOrder; i++)
        outputUnitNum *= outputDimSize[i];

    DTYPE inputData[2][4] = { {5.0, 1.0, 2.0, 8.0},
                              {4.0, 3.0, 7.0, 6.0} };
    DTYPE outputAnswerA[2][4] = { {5.0, 3.0, 7.0, 8.0},
                              {4.0, 1.0, 2.0, 6.0} };
    int indexAnswerA[2][4] = { {0, 1, 1, 0},
                         {1, 0, 0, 1} };

    DTYPE outputAnswerB[2][4] = { {8.0, 5.0, 2.0, 1.0},
                            {7.0, 6.0, 4.0, 3.0} };
    int indexAnswerB[2][4] = { {3, 0, 2, 1},
                               {2, 3, 0, 1} };

    /* CPU test */
    bool cpuTest = true;

    /* create tensors */
    XTensor * input = NewTensor(inputOrder, inputDimSize);
    XTensor * outputA = NewTensor(outputOrder, outputDimSize);
    XTensor * outputB = NewTensor(outputOrder, outputDimSize);
    XTensor * indexA = NewTensor(outputOrder, outputDimSize, X_INT);
    XTensor * indexB = NewTensor(outputOrder, outputDimSize, X_INT);

    /* initialize variables */
    input->SetData(inputData, inputUnitNum);
    outputA->SetZeroAll();
    outputB->SetZeroAll();
    indexA->SetZeroAll();
    indexB->SetZeroAll();

    /* call TopK function */
    int dim = 0;
    int k = inputDimSize[dim];
    TopK(input, outputA, indexA, dim, k);

    dim = 1;
    k = inputDimSize[dim];
    TopK(input, outputB, indexB, dim, k);

 //   {
 //   /* CPU check data */
 //   printf("CPU output data:\n");
 //   XTensor * tensor1 = outputA;
	//DTYPE * checkData = (DTYPE*)tensor1->data;
	//for (int i = 0; i < tensor1->unitNum; i++)
	//	printf("%.2f\t", *checkData++);
	//printf("\n");
 //   }

 //   {
 //   /* CPU index data */
 //   printf("CPU index data:\n");
 //   XTensor * tensor1 = index;
	//int * checkData = (int*)tensor1->data;
	//for (int i = 0; i < tensor1->unitNum; i++)
	//	printf("%d\t", *checkData++);
	//printf("\n");
 //   }

    /* check results */
    cpuTest = outputA->CheckData(outputAnswerA, outputUnitNum) && 
              outputB->CheckData(outputAnswerB, outputUnitNum) &&
              indexA->CheckData(indexAnswerA, outputUnitNum) &&
              indexB->CheckData(indexAnswerB, outputUnitNum);

#ifdef USE_CUDA
    /* GPU test */
    bool gpuTest = true;

    /* create tensors */
    XTensor * inputGPU = NewTensor(inputOrder, inputDimSize, X_FLOAT, 1.0F, 0);
    XTensor * outputGPUA = NewTensor(outputOrder, outputDimSize, X_FLOAT, 1.0F, 0);
    XTensor * outputGPUB = NewTensor(outputOrder, outputDimSize, X_FLOAT, 1.0F, 0);
    XTensor * indexGPUA = NewTensor(outputOrder, outputDimSize, X_INT, 1.0F, 0);
    XTensor * indexGPUB = NewTensor(outputOrder, outputDimSize, X_INT, 1.0F, 0);
    
    /* initialize variables */
    inputGPU->SetData(inputData, inputUnitNum);
    outputGPUA->SetZeroAll();
    outputGPUB->SetZeroAll();
    indexGPUA->SetZeroAll();
    indexGPUB->SetZeroAll();

    /* call TopK function */
    dim = 0;
    k = inputDimSize[dim];
    TopK(inputGPU, outputGPUA, indexGPUA, dim, k);
    
    dim = 1;
    k = inputDimSize[dim];
    TopK(inputGPU, outputGPUB, indexGPUB, dim, k);
    
    /* check results */
    gpuTest = outputGPUA->CheckData(outputAnswerA, outputUnitNum) && 
              outputGPUB->CheckData(outputAnswerB, outputUnitNum) &&
              indexGPUA->CheckData(indexAnswerA, outputUnitNum) &&
              indexGPUB->CheckData(indexAnswerB, outputUnitNum);

    /* destroy variables */
    delete input, outputA, outputB, indexA, indexB;
    delete inputGPU, outputGPUA, outputGPUB, indexGPUA, indexGPUB;
    delete[] inputDimSize, outputDimSize;

    return cpuTest && gpuTest;
#else
    /* destroy variables */
    delete input, outputA, outputB, indexA, indexB;

    delete[] inputDimSize, outputDimSize;

    return cpuTest;
#endif // USE_CUDA
}

/* other cases */
/*
TODO!!
*/

/* test for TopK Function */
extern "C"
bool TestTopK()
{
    XPRINT(0, stdout, "[TEST TopK]\n");
    bool returnFlag = true, caseFlag = true;

    /* case 1 test */
    caseFlag = TestTopK1();
    if (!caseFlag) {
        returnFlag = false;
        XPRINT(0, stdout, ">> case 1 failed!\n");
    }
    else
        XPRINT(0, stdout, ">> case 1 passed!\n");

    /* other cases test */
    /*
    TODO!!
    */

    if (returnFlag) {
        XPRINT(0, stdout, ">> All Passed!\n");
    }
    else
        XPRINT(0, stdout, ">> Failed!\n");

    XPRINT(0, stdout, "\n");

    return returnFlag;
    }

} // namespace nts(NiuTrans.Tensor)
