Merge with XU Chen branch (Don't use this! It's an incomplete version)

1. Clean the codes. 2. Fix minor errors.

Merge with XU Chen branch (Don't use this! It's an incomplete version)
1. Clean the codes. 2. Fix minor errors.
549e6d0f · liyinqiao · 2c3c2c86 · 549e6d0f · 549e6d0f
Commit 549e6d0f authored Mar 18, 2020 by liyinqiao
--- a/source/Main.cpp
+++ b/source/Main.cpp
@@ -63,155 +63,3 @@ int main( int argc, const char ** argv )
    return 0;
 }
-void BackwardTest()
-{
-    XNet net;
-    XTensor a;
-    XTensor b;
-    XTensor c;
-    a.enableGrad = true;
-    b.enableGrad = false;
-    c.enableGrad = false;
-    XTensor mean;
-    XTensor origin;
-    InitTensor2DV2(&a, 2, 3);
-    InitTensor1DV2(&b, 2);
-    a.SetZeroAll();
-    b.SetZeroAll();
-    a.Set2D(1.0F, 0, 0);
-    a.Set2D(2.0F, 0, 1);
-    a.Set2D(3.0F, 0, 2);
-    a.Set2D(4.0F, 1, 0);
-    a.Set2D(5.0F, 1, 1);
-    a.Set2D(6.0F, 1, 2);
-    b.Set1D(2.0F, 0);
-    b.Set1D(1.0F, 1);
-    DivDim(a, b, c, 0);
-    c.Dump(stderr, "c:");
-    auto loss = CrossEntropy(c, a);
-    //XLink::ShowNetwork(stderr, &c);
-    net.Backward(loss);
-    a.grad->Dump(stderr);
-}
-void TransposeTest()
-{
-#ifdef USE_CUDA
-    XMem mem0(0, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
-    //XMem mem1(1, UNI_FREE, MILLION * 64, 1024, MILLION * 64);
-    XTensor x;
-    XTensor y;
-    XTensor z;
-    int loops = 2000;
-    int B = 3 * 2 * 4;
-    int K = 8 * 1;
-    int N = 50;
-    int H = 512 * 4;
-    int nnn = GDevs.nGPU;
-    InitTensor3DV2(&x, B, N, H, X_FLOAT, 0);
-    InitTensor4DV2(&y, K, B, N, H/K, X_FLOAT, 0);
-    InitTensor3DV2(&z, B, N, H, X_FLOAT, 0);
-    cudaEvent_t ctime0;
-    cudaEvent_t ctime1;
-    cudaEvent_t ctime2;
-    cudaEvent_t ctime3;
-    cudaEvent_t ctime4;
-    cudaEvent_t ctime5;
-    float elapsedSplit = 0.0;
-    float elapsedMerge = 0.0;
-    float elapsedSum = 0.0;
-    cudaEventCreate(&ctime0);
-    cudaEventCreate(&ctime1);
-    cudaEventCreate(&ctime2);
-    cudaEventCreate(&ctime3);
-    cudaEventCreate(&ctime4);
-    cudaEventCreate(&ctime5);
-    cudaEventRecord(ctime0, 0);
-    double time0 = GetClock();
-    for(int i = 0; i < loops; i++)
-        _Split(&x, &y, 2, K);
-    double time1 = GetClock();
-    cudaEventRecord(ctime1, 0);
-    cudaEventSynchronize(ctime1);
-    cudaEventElapsedTime(&elapsedSplit, ctime0, ctime1);
-    cudaEventRecord(ctime2, 0);
-    double time2 = GetClock();
-    for(int i = 0; i < loops; i++)
-        _Merge(&y, &x, 3);
-    double time3 = GetClock();
-    cudaEventRecord(ctime3, 0);
-    cudaEventSynchronize(ctime3);
-    cudaEventElapsedTime(&elapsedMerge, ctime2, ctime3);
-    cudaEventRecord(ctime4, 0);
-    double time4 = GetClock();
-    for(int i = 0; i < loops; i++)
-        _Sum(&x, &z, &x);
-    double time5 = GetClock();
-    cudaEventRecord(ctime5, 0);
-    cudaEventSynchronize(ctime5);
-    cudaEventElapsedTime(&elapsedSum, ctime4, ctime5);
-    fprintf(stderr, "split:%f merge:%f sum:%f\n", time1 - time0, time3 - time2, time5 - time4);
-    fprintf(stderr, "split:%f merge:%f sum:%f\n", elapsedSplit, elapsedMerge, elapsedSum);
-#endif
-}
-void SumDimTest()
-{
-    XTensor x;
-    XTensor y;
-    XTensor z;
-    int a = 5;
-    int b = 7;
-    int c = 3;
-    InitTensor3DV2(&x, a, b, c, X_FLOAT, -1);
-    InitTensor1DV2(&y, c, X_FLOAT, -1);
-    InitTensor3DV2(&z, a, b, c, X_FLOAT, -1);
-    x.SetZeroAll();
-    y.SetZeroAll();
-    z.SetZeroAll();
-    DTYPE * data = new DTYPE[x.unitNum];
-    for(int i = 0; i < x.unitNum; i++)
-        data[i] = (DTYPE)i;
-    x.SetData(data, x.unitNum);
-    for(int i = 0; i < y.unitNum; i++)
-        data[i] = -(DTYPE)i;
-    y.SetData(data, y.unitNum);
-    _SumDim(&x, &y, &z, 2);
-    z.Dump(stderr, "z:");
-    delete[] data;
-}
--- a/source/tensor/XCall.cpp
+++ b/source/tensor/XCall.cpp
@@ -842,7 +842,7 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
 XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType, const int myDevID, const bool isEnableGrad)
 {
    int size = abs(upper - lower);
-    int unitNum = ceil(1.0 * size / abs(step));
+    int unitNum = (int)ceil(1.0 * size / abs(step));
    XTensor * tensor = NewTensor1D(unitNum, myDataType, myDevID, isEnableGrad);
    tensor->Range(lower, upper, step);