Roll back some codes.

Find some fp16 bugs during decoding NMT system. We decide to roll back some codes to the last version. These codes need to be reviewed.

Roll back some codes.
Find some fp16 bugs during decoding NMT system. We decide to roll back some codes to the last version. These codes need to be reviewed.
7d8fedae · liyinqiao · f451bc79 · 7d8fedae · 7d8fedae · 7d8fedae
Commit 7d8fedae authored Sep 14, 2020 by liyinqiao
--- a/source/tensor/XDataType.cpp
+++ b/source/tensor/XDataType.cpp
@@ -60,4 +60,25 @@ TENSOR_DATA_TYPE GetDataType(const char * typeName)
    }
 }

+/*
+Below is for calling CPU BLAS for fast matrix operations
+I'm not sure how fast it is. But it seems that other
+guys are crazy about this. So I decided to have a try.
+*/
+
+/* float -> float16 */
+_XINLINE_ unsigned short FloatToFloat16(float f)
+{
+    unsigned int x = *((unsigned int*)&f);
+    unsigned short h = ((x>>16)&0x8000)|((((x&0x7f800000)-0x38000000)>>13)&0x7c00)|((x>>13)&0x03ff);
+    return h;
+}
+
+/* float16 -> float */
+_XINLINE_ float Float16ToFloat(unsigned short h)
+{
+    float f = float(((h&0x8000)<<16) | (((h&0x7c00)+0x1C000)<<13) | ((h&0x03FF)<<13));
+    return f;
+}
+
 } /* end of the nts (NiuTrans.Tensor) namespace */
--- a/source/tensor/XDataType.h
+++ b/source/tensor/XDataType.h
@@ -46,6 +46,10 @@ enum MATRIX_TRANS_TYPE{X_TRANS, X_NOTRANS};
 extern const char * GetDataTypeName(TENSOR_DATA_TYPE type);
 extern TENSOR_DATA_TYPE GetDataType(const char * typeName);

+/* data conversion (for lower precision computation) */
+unsigned short FloatToFloat16(float f);
+float Float16ToFloat(unsigned short h);
+
 #define CheckDataType(a, b) \
 { \
    if(GetDataTypeName(a) != GetDataTypeName(a)){ \

--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -1739,13 +1739,12 @@ void XTensor::Dump(FILE* file, const char* label, const int n, const int beg, co
            }
        }
        else if (dataType == X_FLOAT16) {
-            float16* f = (float16*)d;
-            for (int i = beg; i < end; i++) {
-                float v = f[i].Float();
-                if (i == beg)
-                    fprintf(file, "%e", v);
+            for(int i = beg; i < end; i++){
+                DTYPE f = ((unsigned short*)d)[i];
+                if(i == beg)
+                    fprintf(file, "%e", f);
                else
-                    fprintf(file, " %e", v);
+                    fprintf(file, " %e", f);
            }
        }
        else
@@ -1805,7 +1804,7 @@ void XTensor::BinaryDump(FILE* file)
            break;
        }
        case X_FLOAT16: {
-            fwrite(tmp.data, sizeof(float16), unitNum, file);
+            fwrite(tmp.data, sizeof(unsigned short), unitNum, file);
            break;
        }
        default: {
@@ -1943,8 +1942,8 @@ void XTensor::BinaryRead(FILE* file, size_t offset)
            break;
        }
        case X_FLOAT16: {
-            float16* d = new float16[unitNum];
-            fread(d, sizeof(float16), unitNum, file);
+            unsigned short* d = new unsigned short[unitNum];
+            fread(d, sizeof(unsigned short), unitNum, file);
            SetData(d, unitNum);
            delete[] d;
            break;

--- a/source/tensor/core/CHeader.h
+++ b/source/tensor/core/CHeader.h
@@ -91,7 +91,6 @@
 #include "sort/TopK.h"

 #include "utilities/CheckData.h"
-#include "utilities/Float16.h"
 #include "utilities/FlushToMem.h"
 #include "utilities/SetAscendingOrder.h"
 #include "utilities/XMatrixSegment.h"

--- a/source/tensor/core/getandset/ConvertDataType.cpp
+++ b/source/tensor/core/getandset/ConvertDataType.cpp
@@ -24,7 +24,6 @@
 #include "ConvertDataType.h"
 #include "ConvertDataType.cuh"
 #include "../movement/CopyValues.h"
-#include "../utilities/Float16.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)

@@ -49,12 +48,12 @@ void ConvertDataType(int devID,

    if(typeS == X_FLOAT && typeT == X_FLOAT16){
        for(int i = 0; i < size; i++){
-            ((float16*)t)[i] = float16(((float*)s)[i]);
+            ((unsigned short*)t)[i] = FloatToFloat16(((float*)s)[i]);
        }
    }
    else if(typeS == X_FLOAT16 && typeT == X_FLOAT){
        for(int i = 0; i < size; i++){
-            ((float*)t)[i] = ((float16*)s)[i].Float();
+            ((float*)t)[i] = Float16ToFloat(((unsigned short*)s)[i]);
        }
    }
    else{
@@ -95,15 +94,15 @@ void _ConvertDataType(const XTensor * input, XTensor * output)
    }
    else if (input->dataType == X_FLOAT && output->dataType == X_FLOAT16) {
        float* inputData = (float*)input->data;
-        float16* outputData = (float16*)output->data;
+        unsigned short* outputData = (unsigned short*)output->data;
        for (int i = 0; i < input->unitNum; i++)
-            outputData[i] = (float16)inputData[i];
+            outputData[i] = (unsigned short)inputData[i];
    }
    else if (input->dataType == X_FLOAT16 && output->dataType == X_FLOAT) {
-        float16* inputData = (float16*)input->data;
+        unsigned short* inputData = (unsigned short*)input->data;
        float* outputData = (float*)output->data;
        for (int i = 0; i < input->unitNum; i++)
-            outputData[i] = inputData[i].Float();
+            outputData[i] = (float)inputData[i];
    }
    else
        ShowNTErrors("Unsupported data types for conversion!");

--- a/source/tensor/core/getandset/SetData.cpp
+++ b/source/tensor/core/getandset/SetData.cpp
@@ -25,7 +25,6 @@
 #include "SetData.cuh"
 #include "../../XUtility.h"
 #include "../movement/CopyValues.h"
-#include "../utilities/Float16.h"

 #if !defined( WIN32 ) && !defined( _WIN32 )
    #include "sys/time.h"
@@ -435,19 +434,19 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
        if(tensor->dataType == X_FLOAT){
            float * d = (float*)tensor->data;
            for(int i = 0; i < tensor->unitNum; i++){
-                d[i] = ((float)rand()/RAND_MAX) * variance  + lower;
+                d[i] = variance * ((float)rand()/RAND_MAX) + lower;
            }
        }
        else if (tensor->dataType == X_FLOAT16) {
-            float16* d = (float16*)tensor->data;
+            unsigned short* d = (unsigned short*)tensor->data;
            for (int i = 0; i < tensor->unitNum; i++) {
-                d[i] = ((float16)rand() / RAND_MAX) * variance + lower;
+                d[i] = variance * ((unsigned short)rand() / RAND_MAX) + lower;
            }
        }
        else if(tensor->dataType == X_DOUBLE){
            double * d = (double*)tensor->data;
            for(int i = 0; i < tensor->unitNum; i++){
-                d[i] = ((double)rand()/RAND_MAX) * variance+ lower;
+                d[i] = variance * ((double)rand()/RAND_MAX) + lower;
            }
        }
        else{

--- a/source/tensor/core/utilities/Float16.cpp
+++ b/source/tensor/core/utilities/Float16.cpp
-/* NiuTrans.Tensor - an open-source tensor library
- * Copyright (C) 2020, Natural Language Processing Lab, Northeastern University.
- * All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /*
-  * $Creted by: Guan Huhao 2020-02-05
-  * $Updated by: Xu Chen (email: hello_master1954@163.com) 2020-05-01
-  */
-
-#include "../../XGlobal.h"
-#include "Float16.h"
-
-namespace nts { // namespace nts(NiuTrans.Tensor)
-
-float16 float16::SetOverFlow()
-{
-    exp = 31;
-    data = 0;
-    return *this;
-}
-
-int float16::IsOverlFlow() const 
-{
-    return exp==31;
-}
-
-// mask for calculate the highest 1
-unsigned int float16::mask[32] = 
-{
-    0xffffffff,0xfffffffe,0xfffffffc,0xfffffff8,0xfffffff0,0xffffffe0,0xffffffc0,0xffffff80,
-    0xffffff00,0xfffffe00,0xfffffc00,0xfffff800,0xfffff000,0xffffe000,0xffffc000,0xffff8000,
-    0xffff0000,0xfffe0000,0xfffc0000,0xfff80000,0xfff00000,0xffe00000,0xffc00000,0xff800000,
-    0xff000000,0xfe000000,0xfc000000,0xf8000000,0xf0000000,0xe0000000,0xc0000000,0x80000000
-};
-
-// to calculate the power of 2
-unsigned int float16::pow2[32] = 
-{
-    0x00000001,0x00000002,0x00000004,0x00000008,0x00000010,0x00000020,0x00000040,0x00000080,
-    0x00000100,0x00000200,0x00000400,0x00000800,0x00001000,0x00002000,0x00004000,0x00008000,
-    0x00010000,0x00020000,0x00040000,0x00080000,0x00100000,0x00200000,0x00400000,0x00800000,
-    0x01000000,0x02000000,0x04000000,0x08000000,0x10000000,0x20000000,0x40000000,0x80000000,
-};
-
-// compare the absolute value， if a < b return 1, else return 0
-int float16::AbsCompare(const float16 & a, const float16 & b)
-{
-    if (a.exp < b.exp)
-        return 1;
-    else if (a.exp > b.exp) 
-        return 0;
-
-    return a.data < b.data;
-}
-
-// get inverse that a * inverse(a) == 1
-float16 float16::GetInverse() const 
-{
-    float16 ans;
-    ans.sign = sign;
-    ans.exp = 29 - exp;
-    int rec = pow2[31];
-    //let it div 0x80000000
-    rec /= (this->data | pow2[10]);
-
-    if (!(rec & pow2[21])) {
-        rec <<= 1;
-        ans.exp++;
-    }
-    rec >>= 10;
-    ans.data = rec;
-    return ans;
-}
-
-/* constructor by (sign, exp, data), similar to ieee 32 floating point
->> s - sign: 1bit
->> e - exp:  5bit
->> d - data: 10bit 
-*/
-float16::float16(const int& s, const int& e, const int& d) 
-{
-    sign = s;
-    exp = e;
-    data = d;
-}
-
-/* initializes the 16bit floating point to 0 
-*/
-float16::float16() 
-{
-    sign = 0;
-    exp = 0;
-    data = 0;
-}
-
-/* constructor by other datatype
-   We convert the data to float and convert float to float16.
->> data - num
-*/
-template<class T>
-float16::float16(const T& data) 
-{
-    *this = (float)data;
-}
-template float16::float16 (const int &);
-template float16::float16 (const double &);
-
-/* constructor by a 32-bit float num
->> data - 32-bit float num
-*/
-float16::float16(const float& data) 
-{
-    *this = data;
-}
-
-void float16::Dump()
-{
-    printf("sign: %d\texp: %d\tdata: %d\n", sign, exp, data);
-}
-
-/*
-convert float16 to float and return
-construct of 32-bit is
-the 31th bit present the sign
-the 30th~23th bit present the exp, with 128 offset
-rest 23th～0th store the data
-*/
-float float16::Float() 
-{
-    int ret = 0;
-    ret = IsOverlFlow() ? 0x7f800000 :
-        (sign ? 0x80000000 : 0) | ((exp + 112) << 23) | (data << 13);
-    float p = *(float*)&ret;
-    return p;
-}
-
-// basic assignment function
-float16 float16::operator = (const float16& a) 
-{
-    sign = a.sign;
-    exp = a.exp;
-    data = a.data;
-    return *this;
-}
-
-// convert float to float16
-float16 float16::operator = (const float& a) 
-{
-    unsigned int p = *(unsigned int*)&a;
-    sign = p & pow2[31] ? 1 : 0;
-
-    if (a > 65535 || a < -65535) 
-        return SetOverFlow();
-    exp = ((p >> 23)& (0xf)) | ((p >> 26 & 0x10));
-    data = (p >> 13);
-    return *this;
-}
-
-/* Template assignment function is force change other datetype to float,
-   then call the float assignment function.
-   Template assignment function now support int and double.
-*/
-template <class T>
-float16 float16::operator = (const T& data) 
-{
-    *this = (float)data;
-    return *this;
-}
-template float16 float16:: operator = <int>(const int&);
-template float16 float16:: operator = <double>(const double&);
-
-/*
-template for multi-datatype overload
->> operator - the overload operator, e.g. <, =
->> return_type - the returned datetype of function, e.g, int, float
->> expression - the returned expression
-*/
-#define _OVERLOAD_OPRATER_TEMPLATE(operation, returnType, expression)       \
-template<class T>                                                           \
-returnType float16::operator operation (const T & data)                     \
-{                                                                           \
-    float16 rec=(float)data;                                                \
-    return expression;                                                      \
-}                                                                           \
-template returnType float16::operator operation <int>(const int&);          \
-template returnType float16::operator operation <float>(const float&);      \
-template returnType float16::operator operation <double>(const double&);
-
-// overload operator (less than) a<b
-int float16::operator < (const float16& data) 
-{
-    if (sign < data.sign)
-        return 1;
-    else if (sign > data.sign) 
-        return 0;
-
-    if (exp < data.exp) 
-        return 1;
-    else if (exp > data.exp) 
-        return 0;
-    
-    return this->data < data.data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(< , int, *this < rec)
-
-// overload opertator <= (less or equal than) a <= b
-int float16::operator <= (const float16& data) 
-{
-    if (sign < data.sign)
-        return 1;
-    else if (sign > data.sign) 
-        return 0;
-
-    if (exp < data.exp) 
-        return 1;
-    else if (exp > data.exp) 
-        return 0;
-
-    return this->data <= data.data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(<= , int, *this <= rec)
-
-// overload operator (greater than) a > b
-int float16::operator > (const float16& data) 
-{
-    if (sign > data.sign)
-        return 1;
-    else if (sign < data.sign) 
-        return 0;
-
-    if (exp > data.exp) 
-        return 1;
-    else if (exp < data.exp) 
-        return 0;
-
-    return this->data > data.data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(> , int, * this > rec)
-
-// overload opertator >= (greater or equal than) a >= b
-int float16::operator >= (const float16& data) 
-{
-    if (sign > data.sign)
-        return 1;
-    else if (sign < data.sign) 
-        return 0;
-    
-    if (exp > data.exp) 
-        return 1;
-    else if (exp < data.exp) 
-        return 0;
-
-    return this->data >= data.data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(>= , int, *this < rec)
-
-// overload operator + (add) a + b
-float16 float16::operator + (const float16& data)
-{
-    float16 ans;
-
-    // avoid overflow inf + anything = inf
-    if (this->IsOverlFlow()) 
-        return *this;
-    if (data.IsOverlFlow()) 
-        return data;
-
-    /* the greater number determine the sign and 
-       the smaller should be >> to aligment to the greater one */
-    if (AbsCompare(*this, data)) {
-        ans.sign = data.sign;
-        // rec the exp
-        int recp = data.exp;          
-        //to calculate the data
-        int recd = (data.data | (pow2[10])) + 
-            ((data.sign ^ sign) ? -1 : 1) * 
-            (((pow2[10]) | this->data) >> (data.exp - exp));   
-
-        //because the date may carry， if carryed >> the data, and change its exp
-        if (recd) {        
-            //to make the highest one is 10th bit
-            while (mask[10] & recd) {      
-                recd >>= 1;
-                recp++;
-            }
-            //to make the highest one is 10th bit
-            while (!(mask[10] & recd)) {    
-                recd <<= 1;
-                recp--;
-            }
-        }
-        // if data==0, exp should be 0
-        else 
-            recp = 0;  
-
-        ans.data = recd;
-        // if overflow should set overflow
-        if (recp >= 31) 
-            ans.SetOverFlow(); 
-        else {
-            ans.exp = recp;
-            ans.data = recd;
-        }
-    }
-    // same as above. while divided into two part? reduce assignment to increase efficent
-    else {             
-        ans.sign = sign;
-        int recp = exp;
-        int recd = (this->data | (pow2[10])) + 
-                   ((sign ^ data.sign) ? -1 : 1) * 
-                   (((pow2[10]) | data.data) >> (exp - data.exp));
-        if (recd) {
-            while (mask[10] & recd) {
-                recd >>= 1;
-                recp++;
-            }
-            while (!(mask[10] & recd)) {
-                recd <<= 1;
-                recp--;
-            }
-        }
-        else 
-            recp = 0;
-
-        if (recp >= 31) 
-            ans.SetOverFlow();
-        else {
-            ans.exp = recp;
-            ans.data = recd;
-        }
-    }
-    return ans;
-}
-_OVERLOAD_OPRATER_TEMPLATE(+, float16, *this = *this + rec)
-
-//overide operator +=
-float16 float16::operator+=(const float16& data) {
-    return *this = *this + data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(+=, float16, *this = *this + rec)
-
-//overide operator -（negetive） -a
-float16 float16::operator - () 
-{
-    sign ^= 1;
-    float16 rec = *this;
-    sign ^= 1;
-    return rec;
-}
-
-//overide operator - (substraction) a-b
-float16 float16::operator - (const float16& data) 
-{
-    float16 ans;
-    if (this->IsOverlFlow()) 
-        return *this;
-    if (data.IsOverlFlow()) 
-        return data;
-
-    /* same as add only diffrent is the sign judge, 
-    a possitive number sub a greater number will be negtive. */
-    if (AbsCompare(*this, data)) {
-        ans.sign = !data.sign;
-        int recp = data.exp;
-        int recd = (data.data | (pow2[10])) + 
-            ((data.sign ^ sign) ? 1 : -1) * 
-            (((pow2[10]) | this->data) >> (data.exp - exp));
-        if (recd) {
-            while (mask[10] & recd) {
-                recd >>= 1;
-                recp++;
-            }
-            while (!(mask[10] & recd)) {
-                recd <<= 1;
-                recp--;
-            }
-        }
-        else recp = 0;
-        if (recp >= 31) 
-            ans.SetOverFlow();
-        else {
-            ans.data = recd;
-            ans.exp = recp;
-        }
-    }
-    else {
-        ans.sign = sign;
-        int recp = exp;
-        int recd = (this->data | (pow2[10])) + 
-            ((sign ^ data.sign) ? 1 : -1) * 
-            (((pow2[10]) | data.data) >> (exp - data.exp));
-        if (recd) {
-            while (mask[10] & recd) {
-                recd >>= 1;
-                recp++;
-            }
-            while (!(mask[10] & recd)) {
-                recd <<= 1;
-                recp--;
-            }
-        }
-        else recp = 0;
-        if (recp >= 31) 
-            ans.SetOverFlow();
-        else {
-            ans.data = recd;
-            ans.exp = recp;
-        }
-    }
-    return ans;
-}
-_OVERLOAD_OPRATER_TEMPLATE(-, float16, *this = *this - rec)
-
-// overide operator -=
-float16 float16::operator-=(const float16& data) 
-{
-    return *this = *this - data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(-=, float16, *this = *this - rec)
-
-// overload operator * (multiple) a * b
-float16 float16::operator * (const float16& data) 
-{
-    //if(IsOverlFlow()) 
-    //    return *this;
-    //if(data.IsOverlFlow()) 
-    //    return data;
-
-    float16 ans;
-    // ^ to get zhe result sign different will be 1(negtive), same will be 0 positive;
-    ans.sign = sign ^ data.sign;
-
-    // mul to get answer
-    int rec = (data.data | pow2[10]) * (this->data | pow2[10]); 
-    
-    // calculat the new exp
-    int recp = exp + data.exp - 15 > 0 ? exp + data.exp - 15 : 0;       
-    
-    // if carryed, to fix the exp and data
-    rec >>= 10;                                           
-    while (rec & mask[11]) {
-        ++recp;
-        rec >>= 1;
-    }
-
-    if (recp >= 31) 
-        ans.SetOverFlow();
-    else {
-        ans.exp = recp;
-        ans.data = rec;
-    }
-    return ans;
-}
-_OVERLOAD_OPRATER_TEMPLATE(*, float16, (*this)* rec)
-
-// overload operator *= (multiple) a *= b
-float16 float16::operator *= (const float16& data) 
-{
-    return *this = *this * data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(*=, float16, *this = *this * rec)
-
-// overload operator / (division) a / b
-float16 float16::operator / (const float16& data) 
-{
-    float16 ans;
-    // ^ to get zhe result sign different will be 1(negtive),same will be 0 positive;
-    ans.sign = sign ^ data.sign;                       
-    // calculat the new exp
-    int recp = exp - data.exp + 14;                        
-    // defore div should move to the left to avoid precision loss
-    int recd = (this->data << 21) | pow2[31];              
-    recd /= (data.data | pow2[10]);
-    // to make the highest one is the 21st bit
-    if (recd & pow2[21]) {                              
-        recd >>= 1;
-        ++recp;
-    }
-    if (recp >= 31) 
-        ans.SetOverFlow();
-    else {
-        recd >>= 10;
-        ans.data = recd;
-        ans.exp = recp;
-    }
-    return ans;
-}
-_OVERLOAD_OPRATER_TEMPLATE(/ , float16, (*this) / rec)
-
-// overload operator /= (division) a /= b
-float16 float16::operator /= (const float16& data) {
-    return *this = *this / data;
-}
-_OVERLOAD_OPRATER_TEMPLATE(/=, float16, *this = *this / rec)
-
-} // namespace nts(NiuTrans.Tensor)
--- a/source/tensor/core/utilities/Float16.h
+++ b/source/tensor/core/utilities/Float16.h
-/* NiuTrans.Tensor - an open-source tensor library
- * Copyright (C) 2020, Natural Language Processing Lab, Northeastern University.
- * All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * $Creted by: Guan Huhao 2020-02-05
- * $Updated by: Xu Chen (email: hello_master1954@163.com) 2020-05-01
- */
-
-#ifndef FLOAT16_H
-#define FLOAT16_H
-
-namespace nts { // namespace nts(NiuTrans.Tensor)
-
-struct float16
-{
-private:
-    /* 
-    sign is the sign bit 1 means negative, 0 means positive
-    exp is the exponent with 16 offset
-    data is the data, similar to ieee-754, the highest is default 1 and ignored 
-    */
-    unsigned short data : 10;
-    unsigned short exp : 5;
-    unsigned short sign : 1;
-
-    // mask for calculate the highest 1
-    static unsigned int mask[32];
-    static unsigned int pow2[32];
-    
-    //int FindHighOne(const int &num, int &l, int &r);
-    int AbsCompare(const float16 & a,const float16 & b);
-
-public:
-    float16 SetOverFlow();
-
-    // judge whether overflow
-    int IsOverlFlow() const;
-    
-    /* constructor by (sign, exp, data)
-       similar to ieee 32 floating point
-       sign: 1bit 
-       exp:  5bit 
-       data: 10bit */
-    float16(const int& s, const int& e, const int& d);
-
-    /* default constructor
-       This initializes the 16bit floating point to 0. */
-    float16();
-
-    // constructor by a 32-bit float num
-    float16(const float& data);
-
-    // constructor by other datatype
-    template<class T> float16(const T& data);
-
-    void Dump();
-
-    // convert float16 to float and return
-    float Float();
-    
-    /* assignment function and tempalte function
-       Float assignment function is the basic function.
-       Template assignment function is force change other datetype to float,
-       then call the float assignment function.
-       Template assignment function now support int and double. */
-    float16 operator = (const float& data);
-    float16 operator = (const float16& data);
-    template<class T>  float16 operator = (const T& data);
-
-    // overload operator (less than) a < b
-    int operator < (const float16& data);
-    template<class T>  int operator < (const T& data);
-
-    // overload opertator <= (less or equal than) a <= b
-    int operator <= (const float16& data);
-    template<class T> int operator <= (const T& data);
-
-    // overload operator (greater than) a > b
-    int operator > (const float16& data);
-    template<class T> int operator > (const T& data);
-
-    // overload opertator >= (greater or equal than) a >= b
-    int operator >= (const float16& data);
-    template<class T> int operator >= (const T& data);
-
-    // overload operator + (add) a + b
-    float16 operator + (const float16& data);
-    template<class T> float16 operator + (const T& data);
-
-    // overload operator += (add) a += b
-    float16 operator += (const float16& data);
-    template<class T> float16 operator += (const T& data);
-
-    // overload operator -(negetive) -a
-    float16 operator - ();
-
-    // overload operator - (substraction) a - b
-    float16 operator - (const float16& data);
-    template<class T> float16 operator - (const T& data);
-
-    // overload operator -= (substraction) a -= b
-    float16 operator -= (const float16& data);
-    template<class T> float16 operator -= (const T& data);
-
-    // overload operator * (multiple) a * b
-    float16 operator * (const float16& data);
-    template<class T> float16 operator * (const T& data);
-
-    // overload operator *= (multiple) a *= b
-    float16 operator *= (const float16& data);
-    template<class T> float16 operator *= (const T& data);
-
-    // overload operator / (division) a / b
-    float16 GetInverse() const;
-    float16 operator / (const float16& data);
-    template<class T> float16 operator / (const T& data);
-
-    // overload operator /= (division) a /= b
-    float16 operator /= (const float16& data);
-    template<class T> float16 operator /= (const T& data);
-
-};
-
-} // namespace nts(NiuTrans.Tensor)
-
-#endif /* FLOAT16_H */