/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-19
*/

#include "../XTensor.h"
#include "../XUtility.h"
#include "TSigmoid.h"

namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
* backward computation: dE/ds = dE/dy * dy/dx
*/
bool TestSigmoid1()
{
    /* a input tensor of size (3) */
    int sOrder = 1;
    int * sDimSize = new int[sOrder];
    sDimSize[0] = 3;

    int sUnitNum = 1;
    for (int i = 0; i < sOrder; i++)
        sUnitNum *= sDimSize[i];

    DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
    DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
    DTYPE answer[3];

    /* CPU test */
    bool cpuTest = true;

    /* create tensors */
    XTensor * x = NewTensor(sOrder, sDimSize);
    XTensor * y = NewTensor(sOrder, sDimSize);
    XTensor * g = NewTensor(sOrder, sDimSize);
    XTensor * dedy = NewTensor(sOrder, sDimSize);
    XTensor * dedx = NewTensor(sOrder, sDimSize);

    /* initialize variables */
    x->SetData(xData, sUnitNum);
    g->SetData(gData, sUnitNum);
    y->SetZeroAll();
    dedx->SetZeroAll();

    /* call Sigmoid function */
    Sigmoid(x, y);

    /* cross_entropy: de/dy_i = -t_i / y_i */
    DTYPE dedyData[3];
    DTYPE * yProcessedData = (DTYPE*)y->data;
	for (int i = 0; i < sUnitNum; i++)
		dedyData[i] = - gData[i] / yProcessedData[i];

    /* initialize variables */
    dedy->SetData(dedyData, sUnitNum);
    
	for (int i = 0; i < sUnitNum; i++)
		answer[i] = dedyData[i] * yProcessedData[i] * (1 - yProcessedData[i]);

    /* call SigmoidBackward function */
    SigmoidBackward(g, y, x, dedy, dedx, NOLOSS);
    
    /* check result */
    printf("CPU Test:\n");
    printf("Computer de/dx:");
    DTYPE * checkData = (DTYPE*)dedx->data;
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", checkData[i]);
    }
    printf("\n");

    printf("Real de/dx:");
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", answer[i]);
    }
    printf("\n");

#ifdef USE_CUDA
    /* GPU test */
    bool gpuTest = true;

        /* create tensors */
    XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);

    /* initialize variables */
    xGPU->SetData(xData, sUnitNum);
    gGPU->SetData(gData, sUnitNum);
    yGPU->SetZeroAll();
    dedxGPU->SetZeroAll();

    /* call Sigmoid function */
    Sigmoid(xGPU, yGPU);

    /* cross_entropy: de/dy_i = -t_i / y_i */
    void * yProcessedDataGPU = (DTYPE*)yGPU->data;
    int size = sUnitNum * yGPU->unitSize;
    DTYPE * copy = new DTYPE[size];
    XMemCopy(copy, -1, yProcessedDataGPU, yGPU->devID, size);
	for (int i = 0; i < sUnitNum; i++) {
		dedyData[i] = - gData[i] / *copy++;
    }

    /* initialize variables */
    dedyGPU->SetData(dedyData, sUnitNum);
    
	for (int i = 0; i < sUnitNum; i++)
		answer[i] = dedyData[i] * yProcessedData[i] * (1 - yProcessedData[i]);

    /* call SigmoidBackward function */
    SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
    
    /* check result */
    printf("\nGPU Test:\n");
    printf("Computer de/dx:");
    checkData = (DTYPE*)dedxGPU->data;
    size = sUnitNum * dedxGPU->unitSize;
    DTYPE * copy1 = new DTYPE[size];
    XMemCopy(copy1, -1, checkData, dedxGPU->devID, size);
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", copy1[i]);
    }
    printf("\n");

    printf("Real de/dx:");
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", answer[i]);
    }
    printf("\n\n");

    /* destroy variables */
    delete x, y, g, dedx, dedy;
    delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU;
    delete[] sDimSize;

    return cpuTest && gpuTest;
#else
    /* destroy variables */
    delete x, y, g, dedx, dedy;
    delete[] sDimSize;

    return cpuTest;
#endif // USE_CUDA
}

/* case 2: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
* backward computation: dE/ds = dE/dy * dy/dx
*/
bool TestSigmoid2()
{
    /* a input tensor of size (3) */
    int sOrder = 1;
    int * sDimSize = new int[sOrder];
    sDimSize[0] = 3;

    int sUnitNum = 1;
    for (int i = 0; i < sOrder; i++)
        sUnitNum *= sDimSize[i];

    DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
    DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
    DTYPE answer[3] = {0.0F, 0.0F, 0.0F};

    /* CPU test */
    bool cpuTest = true;

    /* create tensors */
    XTensor * x = NewTensor(sOrder, sDimSize);
    XTensor * y = NewTensor(sOrder, sDimSize);
    XTensor * g = NewTensor(sOrder, sDimSize);
    XTensor * dedy = NewTensor(sOrder, sDimSize);
    XTensor * dedx = NewTensor(sOrder, sDimSize);

    /* initialize variables */
    x->SetData(xData, sUnitNum);
    g->SetData(gData, sUnitNum);
    y->SetZeroAll();
    dedx->SetZeroAll();

    /* call Sigmoid function */
    Sigmoid(x, y);

    /* call SigmoidBackward function */
    SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
    
    /* check result */
    printf("CPU Test:\n");
    printf("Computer de/dx:");
    DTYPE * checkData = (DTYPE*)dedx->data;
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", checkData[i]);
    }
    printf("\n");

    printf("Real de/dx:");
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", answer[i]);
    }
    printf("\n");

#ifdef USE_CUDA
    /* GPU test */
    bool gpuTest = true;

        /* create tensors */
    XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
    XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);

    /* initialize variables */
    xGPU->SetData(xData, sUnitNum);
    gGPU->SetData(gData, sUnitNum);
    yGPU->SetZeroAll();
    dedxGPU->SetZeroAll();

    /* call Sigmoid function */
    Sigmoid(xGPU, yGPU);

    /* call SigmoidBackward function */
    SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
    
    /* check result */
    printf("\nGPU Test:\n");
    printf("Computer de/dx:");
    checkData = (DTYPE*)dedxGPU->data;
    int size = sUnitNum * dedxGPU->unitSize;
    DTYPE * copy1 = new DTYPE[size];
    XMemCopy(copy1, -1, checkData, dedxGPU->devID, size);
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", copy1[i]);
    }
    printf("\n");

    printf("Real de/dx:");
    for (int i = 0; i < sUnitNum; i++) {
        printf("\t%f", answer[i]);
    }
    printf("\n\n");

    /* destroy variables */
    delete x, y, g, dedx, dedy;
    delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU;
    delete[] sDimSize;

    return cpuTest && gpuTest;
#else
    /* destroy variables */
    delete x, y, g, dedx, dedy;
    delete[] sDimSize;

    return cpuTest;
#endif // USE_CUDA
}

/* other cases */
/*
    TODO!!
*/

/* test for Sigmoid Function */
extern "C"
bool TestSigmoid()
{
    XPRINT(0, stdout, "[TEST SIGMOID] -------------\n");
    bool returnFlag = true, caseFlag = true;

    /* case 1 test */
    caseFlag = TestSigmoid1();

    if (!caseFlag) {
        returnFlag = false;
        XPRINT(0, stdout, ">> case 1 failed!\n");
    }
    else
        XPRINT(0, stdout, ">> case 1 passed!\n");

    /* other cases test */
    /*
    TODO!!
    */

    if (returnFlag) {
        XPRINT(0, stdout, ">> All Passed!\n");
    }
    else
        XPRINT(0, stdout, ">> Failed!\n");

    XPRINT(0, stdout, "\n");

    return returnFlag;
}

} // namespace nts(NiuTrans.Tensor)
