add cpu half test library , dir is source/tensor/halfLib

41dbf0a9 · ltb · 1da50ae2 · 41dbf0a9 · 41dbf0a9 · 41dbf0a9
Commit 41dbf0a9 authored Aug 05, 2019 by ltb
--- a/source/tensor/halfLib/HalfFloat/HalfPrecisionFloatTest.cpp
+++ b/source/tensor/halfLib/HalfFloat/HalfPrecisionFloatTest.cpp
+#include <iostream>
+#include <assert.h>
+#include <direct.h>
+#include "../core/utilities/FlushToMem.h"
+#include "../core/getandset/ConvertDataType.h"
+#include "../XTensor.h"
+#include "umHalf.h"
+using namespace nts;
+//#define VALIDATE(x) if (!(x)){std::cout << "Failed: " <<  #x << std::endl;assert((x));}
+
+int main(int argc, char* argv[])
+{
+	char *path;
+	path = getcwd(NULL, 0);
+	strcat(path, "\\source\\tensor\\HalfFloat\\dump");
+
+	XTensor a;
+	XTensor halfa;
+	int dim = 4;
+	int devId = 0;
+	InitTensor2DV2(&a,dim,dim,X_FLOAT,devId);
+	a.SetDataRand(-2.0,2.0);
+
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfa.Dump(&halfa, stderr, "halfa:");
+	GPUToCPUFlush(&halfa);
+	FILE * file = fopen(path, "wb");
+	halfa.Dump(file, "halfa:");
+	fclose(file);
+
+	XTensor halfb;
+	InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
+	FILE *read = fopen(path, "rb");
+	halfb.Read(read, "halfa:");
+	fclose(read);
+	halfb.Dump(&halfb, stderr, "halfb:");
+	
+
+
+	//half h = 1.f, h2 = 2.f;
+	//--h2;
+	//++h2;
+	//--h;
+	//++h;
+	//h2 -= 1.f;
+	//float f = h2, f2 = h;
+	//VALIDATE(1.f == f && f == f2);
+	//half dddd = 15.5;
+	//float hhhh = 15.5;
+	//printf("%x\n", dddd);
+	//printf("%x\n", hhhh);
+	//h = h2;
+	//h2 = 15.5f;
+
+	//f = h2, f2 = h;
+	//VALIDATE(15.5f == f && 1.f == f2);
+	//h2 *= h;
+	//f = h2, f2 = h;
+	//VALIDATE(15.5f == f && 1.f == f2);
+	//h2 /= h;
+	//f = h2, f2 = h;
+	//VALIDATE(15.5f == f && 1.f == f2);
+	//h2 += h;
+	//f = h2, f2 = h;
+	//VALIDATE(16.5f == f && 1.f == f2);
+	//h++; h++; h++;
+	//h2 = -h2;
+	//h2 += 17.5f;
+	//h2 *= h;
+	//f = h2, f2 = h;
+	//VALIDATE(4.f == f && 4.f == f2);
+	//VALIDATE(h == h2);
+	//VALIDATE(h <= h2);
+	//--h;
+	//VALIDATE(h <= h2);
+
+	//h -= 250.f;
+	//VALIDATE(h < h2);
+
+	//h += 500.f;
+	//VALIDATE(h > h2);
+	//VALIDATE(h >= h2);
+
+	//f = h2, f2 = h;
+	//VALIDATE(h * h2 == (half)(f * f2));
+
+	//// addition
+	//// ****************************************************************************
+
+	//// identical exponents
+	//for (float f = 0.f; f < 1000.f; ++f)
+	//{
+	//	half one = f;
+	//	half two = f;
+	//	half three = one + two;
+	//	f2 = three;
+	//	VALIDATE(f*2.f == f2);
+	//}
+
+	//// different exponents
+	//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
+	//{
+	//	half one = f;
+	//	half two = fp;
+	//	half three = one + two;
+	//	f2 = three;
+	//	VALIDATE(f + fp == f2);
+	//}
+
+	//// very small numbers - this is already beyond the accuracy of 16 bit floats.
+	//for (float f = 0.003f; f < 1000.f; f += 0.0005f)
+	//{
+	//	half one = f;
+	//	half two = f;
+	//	half three = one + two;
+	//	f2 = three;
+	//	float m = f * 2.f;
+	//	VALIDATE(f2 > (m - 0.05*m) && f2 < (m + 0.05*m));
+	//}
+
+
+	//// subtraction
+	//// ****************************************************************************
+
+	//// identical exponents
+	//for (float f = 0.f; f < 1000.f; ++f)
+	//{
+	//	half one = f;
+	//	half two = f;
+	//	half three = one - two;
+	//	f2 = three;
+	//	VALIDATE(0.f == f2);
+	//}
+
+	//// different exponents
+	//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
+	//{
+	//	half one = f;
+	//	half two = fp;
+	//	half three = one - two;
+	//	f2 = three;
+	//	VALIDATE(f - fp == f2);
+	//}
+	return 0;
+}
+
--- a/source/tensor/halfLib/HalfFloat/a.md
+++ b/source/tensor/halfLib/HalfFloat/a.md
+https://github.com/acgessler/half_float
+
+C++ implementation of a 16 bit floating-point type mimicking most of the IEEE 754 behaviour. Compatible with the half data type used as texture format by OpenGl/Direct3D.
\ No newline at end of file
--- a/source/tensor/halfLib/HalfFloat/dump
+++ b/source/tensor/halfLib/HalfFloat/dump
+halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
+be2c 3ffd bf2c 3c52 a8f6 3a6a afcf 3eca 3e47 3852 bf6e 3bc8 bff5 bc12 b266 31a4
--- a/source/tensor/halfLib/HalfFloat/main.cpp
+++ b/source/tensor/halfLib/HalfFloat/main.cpp
+#include <iostream>
+#include <assert.h>
+#include <direct.h>
+#include "../../core/utilities/FlushToMem.h"
+#include "../../core/getandset/ConvertDataType.h"
+#include "../../XTensor.h"
+#include "../../XGlobal.h"
+#include "umHalf.h"
+
+using namespace nts;
+
+int main(int argc, char* argv[])
+{
+	char *path;
+	path = getcwd(NULL, 0);
+	strcat(path, "\\source\\tensor\\halfLib\\HalfFloat\\dump");
+
+	XTensor a;
+	XTensor halfa;
+	int dim = 4;
+	int devId = 0;
+	InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
+	a.SetDataRand(-2.0, 2.0);
+
+	halfa = ConvertDataType(a, X_FLOAT16);
+	printf("============save model================\n");
+
+	halfa.Dump(&halfa, stderr, "halfa:");
+	GPUToCPUFlush(&halfa);
+	
+	FILE * file = fopen(path, "wb");
+	halfa.Dump(file, "halfa:");
+	//a.Dump(file, "a");
+	fclose(file);
+
+	XTensor halfb;
+	InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
+
+	XTensor b;
+	InitTensor2DV2(&b, dim, dim, X_FLOAT, devId);
+	printf("==============read model=============\n");
+	FILE *read = fopen(path, "rb");
+	halfb.Read(read, "halfa:");
+	//b.Read(read, "a");
+	fclose(read);
+
+	halfb.Dump(&halfb, stderr, "halfb:");
+
+	return 0;
+}
\ No newline at end of file
--- a/source/tensor/halfLib/HalfFloat/stdint.h
+++ b/source/tensor/halfLib/HalfFloat/stdint.h
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+extern "C++" {
+#endif 
+#     include <wchar.h>
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+}
+#endif
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+typedef __int8            int8_t;
+typedef __int16           int16_t;
+typedef __int32           int32_t;
+typedef __int64           int64_t;
+typedef unsigned __int8   uint8_t;
+typedef unsigned __int16  uint16_t;
+typedef unsigned __int32  uint32_t;
+typedef unsigned __int64  uint64_t;
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+typedef __int64           intptr_t;
+typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+typedef int               intptr_t;
+typedef unsigned int      uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
\ No newline at end of file
--- a/source/tensor/halfLib/HalfFloat/umHalf.h
+++ b/source/tensor/halfLib/HalfFloat/umHalf.h
+///////////////////////////////////////////////////////////////////////////////////
+/*
+Copyright (c) 2006-2008,
+Chris "Krishty" Maiwald, Alexander "Aramis" Gessler
+
+All rights reserved.
+
+Redistribution and use of this software in source and binary forms,
+with or without modification, are permitted provided that the following
+conditions are met:
+
+* Redistributions of source code must retain the above
+  copyright notice, this list of conditions and the
+  following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the
+  following disclaimer in the documentation and/or other
+  materials provided with the distribution.
+
+* Neither the name of the class, nor the names of its
+  contributors may be used to endorse or promote products
+  derived from this software without specific prior
+  written permission of the Development Team.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+///////////////////////////////////////////////////////////////////////////////////
+
+#ifndef UM_HALF_H_INCLUDED
+#define UM_HALF_H_INCLUDED
+
+#include <limits>
+#include <algorithm>
+
+//#ifdef _MSC_VER
+//#include "stdint.h"
+//#else
+//#include <stdint.h>
+//#endif
+#include<stdint.h>
+
+#undef min
+#undef max
+
+///////////////////////////////////////////////////////////////////////////////////
+/** 1. Represents a half-precision floating point value (16 bits) that behaves
+ *  nearly conformant to the IEE 754 standard for floating-point computations.
+ *
+ *  Not all operators have special implementations, most perform time-consuming
+ *  conversions from half to float and back again.
+ *  Differences to IEEE 754:
+ *  - no difference between qnan and snan
+ *  - no traps
+ *  - no well-defined rounding mode
+ */
+ ///////////////////////////////////////////////////////////////////////////////////
+class HalfFloat
+{
+	friend HalfFloat operator+ (HalfFloat, HalfFloat);
+	friend HalfFloat operator- (HalfFloat, HalfFloat);
+	friend HalfFloat operator* (HalfFloat, HalfFloat);
+	friend HalfFloat operator/ (HalfFloat, HalfFloat);
+
+public:
+
+	enum { BITS_MANTISSA = 10 };
+	enum { BITS_EXPONENT = 5 };
+
+	enum { MAX_EXPONENT_VALUE = 31 };
+	enum { BIAS = MAX_EXPONENT_VALUE / 2 };
+
+	enum { MAX_EXPONENT = BIAS };
+	enum { MIN_EXPONENT = -BIAS };
+
+	enum { MAX_EXPONENT10 = 9 };
+	enum { MIN_EXPONENT10 = -9 };
+
+public:
+
+	/** Default constructor. Unitialized by default.
+	 */
+	inline HalfFloat() {}
+
+	/** Construction from an existing half
+	 */
+	inline HalfFloat(const HalfFloat& other)
+		: bits(other.GetBits())
+	{}
+
+	/** Construction from existing values for mantissa, sign
+	 *  and exponent. No validation is performed.
+	 *  @note The exponent is unsigned and biased by #BIAS
+	 */
+	inline HalfFloat(uint16_t _m, uint16_t _e, uint16_t _s);
+
+
+	/** Construction from a single-precision float
+	 */
+	inline HalfFloat(float other);
+
+	/** Construction from a double-precision float
+	 */
+	inline HalfFloat(const double);
+
+
+
+	/** Conversion operator to convert from half to float
+	 */
+	inline operator float() const;
+
+	/** Conversion operator to convert from half to double
+	 */
+	inline operator double() const;
+
+
+
+	/** Assignment operator to assign another half to
+	 *  *this* object.
+	 */
+	inline HalfFloat& operator= (HalfFloat other);
+	inline HalfFloat& operator= (float other);
+	inline HalfFloat& operator= (const double other);
+
+
+	/** Comparison operators
+	 */
+	inline bool operator== (HalfFloat other) const;
+	inline bool operator!= (HalfFloat other) const;
+
+
+	/** Relational comparison operators
+	 */
+	inline bool operator<  (HalfFloat other) const;
+	inline bool operator>  (HalfFloat other) const;
+	inline bool operator<= (HalfFloat other) const;
+	inline bool operator>= (HalfFloat other) const;
+
+	inline bool operator<  (float other) const;
+	inline bool operator>  (float other) const;
+	inline bool operator<= (float other) const;
+	inline bool operator>= (float other) const;
+
+
+	/** Combined assignment operators
+	 */
+	inline HalfFloat& operator += (HalfFloat other);
+	inline HalfFloat& operator -= (HalfFloat other);
+	inline HalfFloat& operator *= (HalfFloat other);
+	inline HalfFloat& operator /= (HalfFloat other);
+
+	inline HalfFloat& operator += (float other);
+	inline HalfFloat& operator -= (float other);
+	inline HalfFloat& operator *= (float other);
+	inline HalfFloat& operator /= (float other);
+
+	/** Post and prefix increment operators
+	 */
+	inline HalfFloat& operator++();
+	inline HalfFloat operator++(int);
+
+	/** Post and prefix decrement operators
+	 */
+	inline HalfFloat& operator--();
+	inline HalfFloat operator--(int);
+
+	/** Unary minus operator
+	 */
+	inline HalfFloat operator-() const;
+
+
+	/** Provides direct access to the bits of a half float
+	 */
+	inline uint16_t GetBits() const;
+	inline uint16_t& GetBits();
+
+
+	/** Classification of floating-point types
+	 */
+	inline bool IsNaN() const;
+	inline bool IsInfinity() const;
+	inline bool IsDenorm() const;
+
+	/** Returns the sign of the floating-point value -
+	 *  true stands for positive.
+	 */
+	inline bool GetSign() const;
+
+public:
+
+	union
+	{
+		uint16_t bits;			// All bits
+		struct
+		{
+			uint16_t Frac : 10;	// mantissa
+			uint16_t Exp : 5;		// exponent
+			uint16_t Sign : 1;		// sign
+		} IEEE;
+	};
+
+
+	union IEEESingle
+	{
+		float Float;
+		struct
+		{
+			uint32_t Frac : 23;
+			uint32_t Exp : 8;
+			uint32_t Sign : 1;
+		} IEEE;
+	};
+
+	union IEEEDouble
+	{
+		double Double;
+		struct {
+			uint64_t Frac : 52;
+			uint64_t Exp : 11;
+			uint64_t Sign : 1;
+		} IEEE;
+	};
+
+	// Enums can not store 64 bit values, so we have to use static constants.
+	static const uint64_t IEEEDouble_MaxExpontent = 0x7FF;
+	static const uint64_t IEEEDouble_ExponentBias = IEEEDouble_MaxExpontent / 2;
+};
+
+/** 2. Binary operations
+ */
+inline HalfFloat operator+ (HalfFloat one, HalfFloat two);
+inline HalfFloat operator- (HalfFloat one, HalfFloat two);
+inline HalfFloat operator* (HalfFloat one, HalfFloat two);
+inline HalfFloat operator/ (HalfFloat one, HalfFloat two);
+
+inline float operator+ (HalfFloat one, float two);
+inline float operator- (HalfFloat one, float two);
+inline float operator* (HalfFloat one, float two);
+inline float operator/ (HalfFloat one, float two);
+
+inline float operator+ (float one, HalfFloat two);
+inline float operator- (float one, HalfFloat two);
+inline float operator* (float one, HalfFloat two);
+inline float operator/ (float one, HalfFloat two);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////
+/** 3. Specialization of std::numeric_limits for type half.
+ */
+ ///////////////////////////////////////////////////////////////////////////////////
+namespace std {
+	template <>
+	class numeric_limits<HalfFloat> {
+
+	public:
+
+		// General -- meaningful for all specializations.
+
+		static const bool is_specialized = true;
+		static HalfFloat min()
+		{
+			return HalfFloat(0, 1, 0);
+		}
+		static HalfFloat max()
+		{
+			return HalfFloat(~0, HalfFloat::MAX_EXPONENT_VALUE - 1, 0);
+		}
+		static const int radix = 2;
+		static const int digits = 10;   // conservative assumption
+		static const int digits10 = 2;  // conservative assumption
+		static const bool is_signed = true;
+		static const bool is_integer = true;
+		static const bool is_exact = false;
+		static const bool traps = false;
+		static const bool is_modulo = false;
+		static const bool is_bounded = true;
+
+		// Floating point specific.
+
+		static HalfFloat epsilon()
+		{
+			return HalfFloat(0.00097656f);
+		} // from OpenEXR, needs to be confirmed
+		static HalfFloat round_error()
+		{
+			return HalfFloat(0.00097656f / 2);
+		}
+		static const int min_exponent10 = HalfFloat::MIN_EXPONENT10;
+		static const int max_exponent10 = HalfFloat::MAX_EXPONENT10;
+		static const int min_exponent = HalfFloat::MIN_EXPONENT;
+		static const int max_exponent = HalfFloat::MAX_EXPONENT;
+
+		static const bool has_infinity = true;
+		static const bool has_quiet_NaN = true;
+		static const bool has_signaling_NaN = true;
+		static const bool is_iec559 = false;
+		static const bool has_denorm = denorm_present;
+		static const bool tinyness_before = false;
+		static const float_round_style round_style = round_to_nearest;
+
+		static HalfFloat denorm_min()
+		{
+			return HalfFloat(1, 0, 1);
+		}
+		static HalfFloat infinity()
+		{
+			return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, 0);
+		}
+		static HalfFloat quiet_NaN()
+		{
+			return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
+		}
+		static HalfFloat signaling_NaN()
+		{
+			return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
+		}
+	};
+} // end namespace std
+
+
+#include "./umHalf.inl"
+
+#ifndef UM_HALF_NO_TYPEDEFS
+typedef HalfFloat float16;
+typedef HalfFloat halfCPU;
+#endif
+
+#endif // !! UM_HALF_H_INCLUDED
--- a/source/tensor/halfLib/HalfFloat/umHalf.inl
+++ b/source/tensor/halfLib/HalfFloat/umHalf.inl
--- a/source/tensor/halfLib/half/a.md
+++ b/source/tensor/halfLib/half/a.md
+#pragma once
--- a/source/tensor/halfLib/half/dump
+++ b/source/tensor/halfLib/half/dump
+halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
+bc68 342d ae59 bcd7 b46a 3c1c 2c25 beb9 bcaf 3d72 3fc2 38d0 bd6b bce4 3854 ad13
--- a/source/tensor/halfLib/half/half.hpp
+++ b/source/tensor/halfLib/half/half.hpp
--- a/source/tensor/halfLib/half/main.cpp
+++ b/source/tensor/halfLib/half/main.cpp
+#include <stdio.h>
+#include <direct.h>
+#include "../../core/CHeader.h"
+#include "../../core/utilities/FlushToMem.h"
+#include "../../core/getandset/ConvertDataType.h"
+#include "../../XTensor.h"
+#include "../../XGlobal.h"
+
+using namespace nts;
+
+int main(int argc, const char ** argv) {
+	char *path;
+	path = getcwd(NULL, 0);
+	strcat(path, "\\source\\tensor\\halfLib\\half\\dump");
+
+	int dim = 4;
+	int devId = 0;
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+
+	XTensor halfa;
+	XTensor halfb;
+	XTensor halfc;
+
+	InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
+	InitTensor2DV2(&c, dim, dim, X_FLOAT, devId);
+	InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
+
+	a.SetDataRand(-2.0, 2.0);
+	c.SetDataRand(-2.0, 2.0);
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfc = ConvertDataType(c, X_FLOAT16);
+
+	printf("============save model================\n");
+	halfa.Dump(&halfa, stderr, "halfa:");
+	GPUToCPUFlush(&halfa);
+
+	FILE * file = fopen(path, "wb");
+	halfa.Dump(file, "halfa:");
+	//a.Dump(file, "a");
+	fclose(file);
+
+	printf("==============read model=============\n");
+	FILE *read = fopen(path, "rb");
+	halfb.Read(read, "halfa:");
+	//b.Read(read, "a");
+	fclose(read);
+	halfb.Dump(&halfb, stderr, "halfb:");
+	printf("==============BMMUL=============\n");
+	b = BMMul(a, X_NOTRANS, c, X_NOTRANS);
+	b.Dump(stderr,"b:");
+	printf("==============BMMUL-float=============\n");
+	halfa= BMMul(halfb, X_NOTRANS, halfc, X_NOTRANS);
+	halfa.Dump(&halfa, stderr, "halfla:");
+	
+	return 0;
+}
\ No newline at end of file
--- a/source/tensor/halfLib/half/matmul.cu
+++ b/source/tensor/halfLib/half/matmul.cu
+#include <stdlib.h>
+#include <stdio.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <time.h>
+#include <cuda_fp16.h>
+
+//#ifndef HALF_ARITHMETIC_TYPE
+//#define HALF_ARITHMETIC_TYPE
+//#endif // !HALF_ARITHMETIC_TYPE
+
+#include "half.hpp"
+
+using half_float::halfFloat;
+
+typedef half_float::halfFloat halfC;
+
+__global__ void matrixMulKernel(__half *C, __half *A, __half *B) {
+	int i = blockIdx.x * blockDim.x + threadIdx.x;
+	C[i] = A[i] * B[i];
+}
+
+void constantInit(halfC *data, int size, halfC val) {
+	for (int i = 0; i < size; ++i) {
+		data[i] = val;
+	}
+}
+
+
+void matrixMul() {
+
+	unsigned int N = 128;
+	unsigned int size = N * sizeof(halfC);
+
+	halfC *h_A = (halfC*)malloc(size);
+	halfC *h_B = (halfC*)malloc(size);
+	halfC *h_C = (halfC*)malloc(size);
+	halfC *h_D = (halfC*)malloc(size);
+
+	// Initialize host memory
+	const halfC valB = (halfC)0.01f;
+	constantInit(h_A, N, (halfC)1.0f);
+	constantInit(h_B, N, valB);
+
+	__half *d_A, *d_B, *d_C;
+	cudaMalloc((void**)&d_A, size);
+	cudaMalloc((void**)&d_B, size);
+	cudaMalloc((void**)&d_C, size);
+
+	//copy host memory to device
+	cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
+	cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
+
+	//config dims
+	dim3 block(16, 16);
+	dim3 grid(N / block.x, N / block.y);
+
+	// Excute the kernel
+	matrixMulKernel << <grid, block >> > (d_C, d_A, d_B);
+
+	// Copy the memory from device to host
+	cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
+
+	//printf("Checking computed result for correctness: ");
+	//bool correct = true;
+	//// test relative error by the formula
+	////     |<x, y>_cpu - <x,y>_gpu|/<|x|, |y|>  < eps
+	//double eps = 1.e-6; // machine zero
+
+	for (int k = 0; k < N; k++) {
+		h_D[k] = h_A[k] * h_B[k];
+	}
+
+	for (int i = 0; i < N; i++) {
+		printf("%hx--%hx ", h_C[i], h_D[i]);
+		if ((i + 1) % 8 == 0)
+			printf("\n");
+	}
+
+	//for (int i = 0; i < width*height; i++) {
+	//	double abs_err = fabs(h_C[i] - (width * valB));
+	//	double dot_length = width;
+	//	double abs_val = fabs(h_C[i]);
+	//	double rel_err = abs_err / abs_val / dot_length;
+	//	if (rel_err > eps)
+	//	{
+	//		printf("Error! Matrix[%05d]=%.8f, ref=%.8f error term is > %E\n", i, h_C[i], (float)(width*height), eps);
+	//		correct = false;
+	//	}
+	//}
+	//printf("%s\n", correct ? "Result = PASS" : "Result = FAIL");
+
+	// Free
+	free(h_A);
+	free(h_B);
+	free(h_C);
+	cudaFree(d_A);
+	cudaFree(d_B);
+	cudaFree(d_C);
+}
+
+int main() {
+	matrixMul();
+}
+
+//
+//#define THREAD_NUM 256
+//#define MATRIX_SIZE 4
+//const halfC blocks_num = MATRIX_SIZE * (MATRIX_SIZE + THREAD_NUM - 1) / THREAD_NUM;
+//
+//__global__ static void matMultCUDA(const __half* a, const __half* b, __half* c, halfC n, clock_t* time)
+//{
+//
+//	//表示目前的 thread 是第几个 thread（由 0 开始计算）
+//	const halfC tid = threadIdx.x;
+//
+//	//表示目前的 thread 属于第几个 block（由 0 开始计算）
+//	const halfC bid = blockIdx.x;
+//
+//	//从 bid 和 tid 计算出这个 thread 应该计算的 row 和 column
+//	const halfC idx = bid * THREAD_NUM + tid;
+//	const halfC row = idx / n;
+//	const halfC column = idx % n;
+//
+//	halfC i;
+//
+//	//记录运算开始的时间
+//	clock_t start;
+//
+//	//只在 thread 0（即 threadIdx.x = 0 的时候）进行记录，每个 block 都会记录开始时间及结束时间
+//	if (tid == 0)
+//		time[bid] = clock();
+//
+//	//计算矩阵乘法
+//	if (row < n && column < n)
+//	{
+//		__half t = __half(0.0);
+//		for (i = 0; i < n; i++)
+//		{
+//			t += a[row * n + i] * b[i * n + column];
+//		}
+//		c[row * n + column] = t;
+//	}
+//
+//	//计算时间,记录结果，只在 thread 0（即 threadIdx.x = 0 的时候）进行，每个 block 都会记录开始时间及结束时间
+//	if (tid == 0)
+//	{
+//		time[bid + blocks_num] = clock();
+//	}
+//}
+//
+//bool InitCuda() {
+//	halfC count;
+//	halfC device;
+//	cudaGetDeviceCount(&count);
+//	if (count == 0) {
+//		fprhalfCf(stderr, "There is no device !\n");
+//	}
+//	else
+//		device = 1;
+//	cudaSetDevice(device);
+//	return true;
+//}
+//template <class T >
+//void matgen(T *a, halfC n) {
+//	halfC i, j;
+//	for (i = 0; i < n; i++) {
+//		for (j = 0; j < n; j++) {
+//			a[i * n + j] = (T)rand() / (0x7FFF) + (halfC)rand() / (0x7FFF * 0x7FFF);
+//		}
+//	}
+//}
+//
+//
+//halfC main(halfC argc, char **argv) {
+//
+//	//CUDA 初始化
+//	if (!InitCuda())
+//		return 0;
+//
+//	//定义矩阵
+//	halfC *a, *b, *c, *d;
+//
+//	halfC n = MATRIX_SIZE;
+//
+//	//分配内存
+//	a = (halfC*)malloc(sizeof(halfC)* n * n);
+//	b = (halfC*)malloc(sizeof(halfC)* n * n);
+//	c = (halfC*)malloc(sizeof(halfC)* n * n);
+//	d = (halfC*)malloc(sizeof(halfC)* n * n);
+//
+//	//设置随机数种子
+//	srand(0);
+//
+//	//随机生成矩阵
+//	matgen(a, n);
+//	matgen(b, n);
+//
+//	for (halfC i = 0; i < n; i++)
+//	{
+//		for (halfC j = 0; j < n; j++)
+//		{
+//			prhalfCf("%x ", a[i * n + j]);
+//		}
+//		prhalfCf("\n");
+//	}
+//
+//	///*把数据复制到显卡内存中*/
+//	__half *cuda_a, *cuda_b, *cuda_c;
+//
+//	clock_t* time;
+//
+//	//cudaMalloc 取得一块显卡内存 
+//	cudaMalloc((void**)&cuda_a, sizeof(__half)* n * n);
+//	cudaMalloc((void**)&cuda_b, sizeof(__half)* n * n);
+//	cudaMalloc((void**)&cuda_c, sizeof(__half)* n * n);
+//
+//	cudaMalloc((void**)&time, sizeof(clock_t)* blocks_num * 2);
+//
+//	//cudaMemcpy 将产生的矩阵复制到显卡内存中
+//	//cudaMemcpyHostToDevice - 从内存复制到显卡内存
+//	//cudaMemcpyDeviceToHost - 从显卡内存复制到内存
+//	cudaMemcpy(cuda_a, a, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
+//	cudaMemcpy(cuda_b, b, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
+//
+//	// 在CUDA 中执行函数 语法：函数名称<<<block 数目, thread 数目, shared memory 大小>>>(参数...);
+//	matMultCUDA << < blocks_num, THREAD_NUM, 0 >> > (cuda_a, cuda_b, cuda_c, n, time);
+//
+//	/*把结果从显示芯片复制回主内存*/
+//
+//	clock_t time_use[blocks_num * 2];
+//
+//	//cudaMemcpy 将结果从显存中复制回内存
+//	cudaMemcpy(c, cuda_c, sizeof(halfC)* n * n, cudaMemcpyDeviceToHost);
+//	cudaMemcpy(&time_use, time, sizeof(clock_t)* blocks_num * 2, cudaMemcpyDeviceToHost);
+//
+//	for (halfC i = 0; i < n; i++)
+//	{
+//		for (halfC j = 0; j < n; j++)
+//		{
+//			prhalfCf("%x ", c[i * n + j]);
+//		}
+//		prhalfCf("\n");
+//	}
+//
+//	//Free cuda
+//	cudaFree(cuda_a);
+//	cudaFree(cuda_b);
+//	cudaFree(cuda_c);
+//	cudaFree(time);
+
+
+	////把每个 block 最早的开始时间，和最晚的结束时间相减，取得总运行时间
+	//clock_t min_start, max_end;
+
+	//min_start = time_use[0];
+
+	//max_end = time_use[blocks_num];
+
+	//for (halfC i = 1; i < blocks_num; i++)
+	//{
+	//	if (min_start > time_use[i]) min_start = time_use[i];
+
+	//	if (max_end < time_use[i + blocks_num]) max_end = time_use[i + blocks_num];
+	//}
+
+	////核函数运行时间
+	//clock_t final_time = max_end - min_start;
+
+
+	////CPU矩阵乘法，存入矩阵d
+	//for (halfC i = 0; i < n; i++)
+	//{
+	//	for (halfC j = 0; j < n; j++)
+	//	{
+	//		double t = 0;
+	//		for (halfC k = 0; k < n; k++){
+
+	//			t += a[i * n + k] * b[k * n + j];
+	//		}
+
+	//		d[i * n + j] = t;
+	//	}
+	//}
+
+	////验证正确性与精确性
+
+	//halfC max_err = (halfC)0.0;
+	//halfC average_err = (halfC)0;
+	//for (halfC i = 0; i < n; i++)
+	//{
+	//	for (halfC j = 0; j < n; j++)
+	//	{
+	//		if (d[i * n + j] != 0)
+	//		{
+	//			//fabs求浮点数x的绝对值
+	//			halfC err = fabs((c[i * n + j] - d[i * n + j]) / d[i * n + j]);
+	//			if (max_err < err) max_err = err;
+	//			average_err += err;
+	//		}
+	//	}
+	//}
+
+	//prhalfCf("Max error: %g Average error: %g\n", max_err, average_err / (n * n));
+	//prhalfCf("gputime: %d\n", final_time);
+//
+//	return 0;
+//}
\ No newline at end of file
--- a/source/tensor/halfLib/half/test.cpp
+++ b/source/tensor/halfLib/half/test.cpp
--- a/source/tensor/halfLib/third-party/THHalf.h
+++ b/source/tensor/halfLib/third-party/THHalf.h
+/*
+ * This implementation is extracted from PyTorch:
+ *   Repo: github.com/pytorch/pytorch
+ *   File: torch/lib/TH/THHalf.c
+ *   Commit ID: 92481b59d31199df57420d4b14912348cc780d1d
+ * Functions are made "static inline" for performance
+ */
+
+ /* Copyright 1993-2014 NVIDIA Corporation.  All rights reserved. */
+
+ // Host functions for converting between FP32 and FP16 formats
+
+static inline void TH_halfbits2float(unsigned short* src, float* res)
+{
+	unsigned h = *src;
+	unsigned sign = ((h >> 15) & 1);
+	unsigned exponent = ((h >> 10) & 0x1f);
+	unsigned mantissa = ((h & 0x3ff) << 13);
+
+	if (exponent == 0x1f) {  /* NaN or Inf */
+		mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
+		exponent = 0xff;
+	}
+	else if (!exponent) {  /* Denorm or Zero */
+		if (mantissa) {
+			unsigned int msb;
+			exponent = 0x71;
+			do {
+				msb = (mantissa & 0x400000);
+				mantissa <<= 1;  /* normalize */
+				--exponent;
+			} while (!msb);
+			mantissa &= 0x7fffff;  /* 1.mantissa is implicit */
+		}
+	}
+	else {
+		exponent += 0x70;
+	}
+
+	*(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);
+}
+
+static inline void TH_float2halfbits(float* src, unsigned short* dest)
+{
+	unsigned x = *(unsigned*)src;
+	unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
+	unsigned sign, exponent, mantissa;
+
+	// Get rid of +NaN/-NaN case first.
+	if (u > 0x7f800000) {
+		*dest = 0x7fffU;
+		return;
+	}
+
+	sign = ((x >> 16) & 0x8000);
+
+	// Get rid of +Inf/-Inf, +0/-0.
+	if (u > 0x477fefff) {
+		*dest = sign | 0x7c00U;
+		return;
+	}
+	if (u < 0x33000001) {
+		*dest = (sign | 0x0000);
+		return;
+	}
+
+	exponent = ((u >> 23) & 0xff);
+	mantissa = (u & 0x7fffff);
+
+	if (exponent > 0x70) {
+		shift = 13;
+		exponent -= 0x70;
+	}
+	else {
+		shift = 0x7e - exponent;
+		exponent = 0;
+		mantissa |= 0x800000;
+	}
+	lsb = (1 << shift);
+	lsb_s1 = (lsb >> 1);
+	lsb_m1 = (lsb - 1);
+
+	// Round to nearest even.
+	remainder = (mantissa & lsb_m1);
+	mantissa >>= shift;
+	if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
+		++mantissa;
+		if (!(mantissa & 0x3ff)) {
+			++exponent;
+			mantissa = 0;
+		}
+	}
+
+	*dest = (sign | (exponent << 10) | mantissa);
+}
--- a/source/tensor/halfLib/third-party/eigen-half.h
+++ b/source/tensor/halfLib/third-party/eigen-half.h
+/*
+ * This implementation is extracted from Eigen:
+ *   Repo: bitbucket.org/eigen/eigen
+ *   File: Eigen/src/Core/arch/CUDA/Half.h
+ *   Commit ID: 96e0f73a35de54f675d825bef5339b2f08e77eb4
+ *
+ * Removed a lot of redundant and cuda-specific code.
+ */
+
+#define EIGEN_STRONG_INLINE static inline
+#define EIGEN_DEVICE_FUNC
+
+ // This file is part of Eigen, a lightweight C++ template library
+ // for linear algebra.
+ //
+ // This Source Code Form is subject to the terms of the Mozilla
+ // Public License v. 2.0. If a copy of the MPL was not distributed
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ //
+ // The conversion routines are Copyright (c) Fabian Giesen, 2016.
+ // The original license follows:
+ //
+ // Copyright (c) Fabian Giesen, 2016
+ // All rights reserved.
+ // Redistribution and use in source and binary forms, with or without
+ // modification, are permitted.
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ // AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+ // Standard 16-bit float type, mostly useful for GPUs. Defines a new
+ // type Eigen::half (inheriting from CUDA's __half struct) with
+ // operator overloads such that it behaves basically as an arithmetic
+ // type. It will be quite slow on CPUs (so it is recommended to stay
+ // in fp32 for CPUs, except for simple parameter conversions, I/O
+ // to disk and the likes), but fast on GPUs.
+
+
+#ifndef EIGEN_HALF_CUDA_H
+#define EIGEN_HALF_CUDA_H
+
+namespace Eigen {
+
+	namespace half_impl {
+
+		// Make our own __half definition that is similar to CUDA's.
+		struct __half {
+			EIGEN_DEVICE_FUNC __half() : x(0) {}
+			explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
+			unsigned short x;
+		};
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
+
+		// Conversion routines, including fallbacks for the host or older CUDA.
+		// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
+		// these in hardware. If we need more performance on older/other CPUs, they are
+		// also possible to vectorize directly.
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
+			__half h;
+			h.x = x;
+			return h;
+		}
+
+		union FP32 {
+			unsigned int u;
+			float f;
+		};
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+			return __float2half(ff);
+
+#elif defined(EIGEN_HAS_FP16_C)
+			__half h;
+			h.x = _cvtss_sh(ff, 0);
+			return h;
+
+#else
+			FP32 f; f.f = ff;
+
+			const FP32 f32infty = { 255 << 23 };
+			const FP32 f16max = { (127 + 16) << 23 };
+			const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
+			unsigned int sign_mask = 0x80000000u;
+			__half o;
+			o.x = static_cast<unsigned short>(0x0u);
+
+			unsigned int sign = f.u & sign_mask;
+			f.u ^= sign;
+
+			// NOTE all the integer compares in this function can be safely
+			// compiled into signed compares since all operands are below
+			// 0x80000000. Important if you want fast straight SSE2 code
+			// (since there's no unsigned PCMPGTD).
+
+			if (f.u >= f16max.u) {  // result is Inf or NaN (all exponent bits set)
+				o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+			}
+			else {  // (De)normalized number or zero
+				if (f.u < (113 << 23)) {  // resulting FP16 is subnormal or zero
+				  // use a magic value to align our 10 mantissa bits at the bottom of
+				  // the float. as long as FP addition is round-to-nearest-even this
+				  // just works.
+					f.f += denorm_magic.f;
+
+					// and one integer subtract of the bias later, we have our final float!
+					o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
+				}
+				else {
+					unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
+
+					// update exponent, rounding bias part 1
+					f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
+					// rounding bias part 2
+					f.u += mant_odd;
+					// take the bits!
+					o.x = static_cast<unsigned short>(f.u >> 13);
+				}
+			}
+
+			o.x |= static_cast<unsigned short>(sign >> 16);
+			return o;
+#endif
+		}
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+			return __half2float(h);
+
+#elif defined(EIGEN_HAS_FP16_C)
+			return _cvtsh_ss(h.x);
+
+#else
+			const FP32 magic = { 113 << 23 };
+			const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
+			FP32 o;
+
+			o.u = (h.x & 0x7fff) << 13;             // exponent/mantissa bits
+			unsigned int exp = shifted_exp & o.u;   // just the exponent
+			o.u += (127 - 15) << 23;                // exponent adjust
+
+			// handle exponent special cases
+			if (exp == shifted_exp) {     // Inf/NaN?
+				o.u += (128 - 16) << 23;    // extra exp adjust
+			}
+			else if (exp == 0) {        // Zero/Denormal?
+				o.u += 1 << 23;             // extra exp adjust
+				o.f -= magic.f;             // renormalize
+			}
+
+			o.u |= (h.x & 0x8000) << 16;    // sign bit
+			return o.f;
+#endif
+		}
+
+	} // end namespace half_impl
+
+} // end namespace Eigen
+
+#endif // EIGEN_HALF_CUDA_H
--- a/source/tensor/halfLib/third-party/float16-compressor.h
+++ b/source/tensor/halfLib/third-party/float16-compressor.h
+#pragma once
+#include <stdint.h>
+/*
+ * This code snippet posted by user Phernost on
+ * https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
+ *
+ * compress and decompress methods are made "inline" for performance
+ */
+
+class Float16Compressor
+{
+	union Bits
+	{
+		float f;
+		int32_t si;
+		uint32_t ui;
+	};
+
+	static int const shift = 13;
+	static int const shiftSign = 16;
+
+	static int32_t const infN = 0x7F800000; // flt32 infinity
+	static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32
+	static int32_t const minN = 0x38800000; // min flt16 normal as a flt32
+	static int32_t const signN = 0x80000000; // flt32 sign bit
+
+	static int32_t const infC = infN >> shift;
+	static int32_t const nanN = (infC + 1) << shift; // minimum flt16 nan as a flt32
+	static int32_t const maxC = maxN >> shift;
+	static int32_t const minC = minN >> shift;
+	static int32_t const signC = signN >> shiftSign; // flt16 sign bit
+
+	static int32_t const mulN = 0x52000000; // (1 << 23) / minN
+	static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift))
+
+	static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted
+	static int32_t const norC = 0x00400; // min flt32 normal down shifted
+
+	static int32_t const maxD = infC - maxC - 1;
+	static int32_t const minD = minC - subC - 1;
+
+public:
+
+	inline static uint16_t compress(float value)
+	{
+		Bits v, s;
+		v.f = value;
+		uint32_t sign = v.si & signN;
+		v.si ^= sign;
+		sign >>= shiftSign; // logical shift
+		s.si = mulN;
+		s.si = s.f * v.f; // correct subnormals
+		v.si ^= (s.si ^ v.si) & -(minN > v.si);
+		v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
+		v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
+		v.ui >>= shift; // logical shift
+		v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
+		v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
+		return v.ui | sign;
+	}
+
+	inline static float decompress(uint16_t value)
+	{
+		Bits v;
+		v.ui = value;
+		int32_t sign = v.si & signC;
+		v.si ^= sign;
+		sign <<= shiftSign;
+		v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
+		v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
+		Bits s;
+		s.si = mulC;
+		s.f *= v.si;
+		int32_t mask = -(norC > v.si);
+		v.si <<= shift;
+		v.si ^= (s.si ^ v.si) & mask;
+		v.si |= sign;
+		return v.f;
+	}
+};
\ No newline at end of file
--- a/source/tensor/halfLib/third-party/npy-halffloat.h
+++ b/source/tensor/halfLib/third-party/npy-halffloat.h
+/*
+ * This implementation is extracted from numpy:
+ *   Repo: github.com/numpy/numpy
+ *   File: numpy/core/src/npymath/halffloat.c
+ *   Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
+ * Functions are made "static inline" for performance, and
+ * non-conversion functions are removed, and generation of
+ * exceptions is disabled.
+ */
+
+#include <cstdint>
+typedef uint16_t npy_uint16;
+typedef uint32_t npy_uint32;
+typedef uint64_t npy_uint64;
+
+/*
+ * This chooses between 'ties to even' and 'ties away from zero'.
+ */
+#define NPY_HALF_ROUND_TIES_TO_EVEN 1
+ /*
+  * If these are 1, the conversions try to trigger underflow,
+  * overflow, and invalid exceptions in the FP system when needed.
+  */
+#define NPY_HALF_GENERATE_OVERFLOW 0
+#define NPY_HALF_GENERATE_UNDERFLOW 0
+#define NPY_HALF_GENERATE_INVALID 0
+
+  /*
+   ********************************************************************
+   *                     BIT-LEVEL CONVERSIONS                        *
+   ********************************************************************
+   */
+
+static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
+{
+	npy_uint32 f_exp, f_sig;
+	npy_uint16 h_sgn, h_exp, h_sig;
+
+	h_sgn = (npy_uint16)((f & 0x80000000u) >> 16);
+	f_exp = (f & 0x7f800000u);
+
+	/* Exponent overflow/NaN converts to signed inf/NaN */
+	if (f_exp >= 0x47800000u) {
+		if (f_exp == 0x7f800000u) {
+			/* Inf or NaN */
+			f_sig = (f & 0x007fffffu);
+			if (f_sig != 0) {
+				/* NaN - propagate the flag in the significand... */
+				npy_uint16 ret = (npy_uint16)(0x7c00u + (f_sig >> 13));
+				/* ...but make sure it stays a NaN */
+				if (ret == 0x7c00u) {
+					ret++;
+				}
+				return h_sgn + ret;
+			}
+			else {
+				/* signed inf */
+				return (npy_uint16)(h_sgn + 0x7c00u);
+			}
+		}
+		else {
+			/* overflow to signed inf */
+#if NPY_HALF_GENERATE_OVERFLOW
+			npy_set_floatstatus_overflow();
+#endif
+			return (npy_uint16)(h_sgn + 0x7c00u);
+		}
+	}
+
+	/* Exponent underflow converts to a subnormal half or signed zero */
+	if (f_exp <= 0x38000000u) {
+		/*
+		 * Signed zeros, subnormal floats, and floats with small
+		 * exponents all convert to signed zero halfs.
+		 */
+		if (f_exp < 0x33000000u) {
+#if NPY_HALF_GENERATE_UNDERFLOW
+			/* If f != 0, it underflowed to 0 */
+			if ((f & 0x7fffffff) != 0) {
+				npy_set_floatstatus_underflow();
+			}
+#endif
+			return h_sgn;
+		}
+		/* Make the subnormal significand */
+		f_exp >>= 23;
+		f_sig = (0x00800000u + (f & 0x007fffffu));
+#if NPY_HALF_GENERATE_UNDERFLOW
+		/* If it's not exactly represented, it underflowed */
+		if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
+			npy_set_floatstatus_underflow();
+		}
+#endif
+		f_sig >>= (113 - f_exp);
+		/* Handle rounding by adding 1 to the bit beyond half precision */
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+		/*
+		 * If the last bit in the half significand is 0 (already even), and
+		 * the remaining bit pattern is 1000...0, then we do not add one
+		 * to the bit after the half significand.  In all other cases, we do.
+		 */
+		if ((f_sig & 0x00003fffu) != 0x00001000u) {
+			f_sig += 0x00001000u;
+		}
+#else
+		f_sig += 0x00001000u;
+#endif
+		h_sig = (npy_uint16)(f_sig >> 13);
+		/*
+		 * If the rounding causes a bit to spill into h_exp, it will
+		 * increment h_exp from zero to one and h_sig will be zero.
+		 * This is the correct result.
+		 */
+		return (npy_uint16)(h_sgn + h_sig);
+	}
+
+	/* Regular case with no overflow or underflow */
+	h_exp = (npy_uint16)((f_exp - 0x38000000u) >> 13);
+	/* Handle rounding by adding 1 to the bit beyond half precision */
+	f_sig = (f & 0x007fffffu);
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+	/*
+	 * If the last bit in the half significand is 0 (already even), and
+	 * the remaining bit pattern is 1000...0, then we do not add one
+	 * to the bit after the half significand.  In all other cases, we do.
+	 */
+	if ((f_sig & 0x00003fffu) != 0x00001000u) {
+		f_sig += 0x00001000u;
+	}
+#else
+	f_sig += 0x00001000u;
+#endif
+	h_sig = (npy_uint16)(f_sig >> 13);
+	/*
+	 * If the rounding causes a bit to spill into h_exp, it will
+	 * increment h_exp by one and h_sig will be zero.  This is the
+	 * correct result.  h_exp may increment to 15, at greatest, in
+	 * which case the result overflows to a signed inf.
+	 */
+#if NPY_HALF_GENERATE_OVERFLOW
+	h_sig += h_exp;
+	if (h_sig == 0x7c00u) {
+		npy_set_floatstatus_overflow();
+	}
+	return h_sgn + h_sig;
+#else
+	return h_sgn + h_exp + h_sig;
+#endif
+}
+
+static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
+{
+	npy_uint64 d_exp, d_sig;
+	npy_uint16 h_sgn, h_exp, h_sig;
+
+	h_sgn = (d & 0x8000000000000000ULL) >> 48;
+	d_exp = (d & 0x7ff0000000000000ULL);
+
+	/* Exponent overflow/NaN converts to signed inf/NaN */
+	if (d_exp >= 0x40f0000000000000ULL) {
+		if (d_exp == 0x7ff0000000000000ULL) {
+			/* Inf or NaN */
+			d_sig = (d & 0x000fffffffffffffULL);
+			if (d_sig != 0) {
+				/* NaN - propagate the flag in the significand... */
+				npy_uint16 ret = (npy_uint16)(0x7c00u + (d_sig >> 42));
+				/* ...but make sure it stays a NaN */
+				if (ret == 0x7c00u) {
+					ret++;
+				}
+				return h_sgn + ret;
+			}
+			else {
+				/* signed inf */
+				return h_sgn + 0x7c00u;
+			}
+		}
+		else {
+			/* overflow to signed inf */
+#if NPY_HALF_GENERATE_OVERFLOW
+			npy_set_floatstatus_overflow();
+#endif
+			return h_sgn + 0x7c00u;
+		}
+	}
+
+	/* Exponent underflow converts to subnormal half or signed zero */
+	if (d_exp <= 0x3f00000000000000ULL) {
+		/*
+		 * Signed zeros, subnormal floats, and floats with small
+		 * exponents all convert to signed zero halfs.
+		 */
+		if (d_exp < 0x3e60000000000000ULL) {
+#if NPY_HALF_GENERATE_UNDERFLOW
+			/* If d != 0, it underflowed to 0 */
+			if ((d & 0x7fffffffffffffffULL) != 0) {
+				npy_set_floatstatus_underflow();
+			}
+#endif
+			return h_sgn;
+		}
+		/* Make the subnormal significand */
+		d_exp >>= 52;
+		d_sig = (0x0010000000000000ULL + (d & 0x000fffffffffffffULL));
+#if NPY_HALF_GENERATE_UNDERFLOW
+		/* If it's not exactly represented, it underflowed */
+		if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
+			npy_set_floatstatus_underflow();
+		}
+#endif
+		d_sig >>= (1009 - d_exp);
+		/* Handle rounding by adding 1 to the bit beyond half precision */
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+		/*
+		 * If the last bit in the half significand is 0 (already even), and
+		 * the remaining bit pattern is 1000...0, then we do not add one
+		 * to the bit after the half significand.  In all other cases, we do.
+		 */
+		if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+			d_sig += 0x0000020000000000ULL;
+		}
+#else
+		d_sig += 0x0000020000000000ULL;
+#endif
+		h_sig = (npy_uint16)(d_sig >> 42);
+		/*
+		 * If the rounding causes a bit to spill into h_exp, it will
+		 * increment h_exp from zero to one and h_sig will be zero.
+		 * This is the correct result.
+		 */
+		return h_sgn + h_sig;
+	}
+
+	/* Regular case with no overflow or underflow */
+	h_exp = (npy_uint16)((d_exp - 0x3f00000000000000ULL) >> 42);
+	/* Handle rounding by adding 1 to the bit beyond half precision */
+	d_sig = (d & 0x000fffffffffffffULL);
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+	/*
+	 * If the last bit in the half significand is 0 (already even), and
+	 * the remaining bit pattern is 1000...0, then we do not add one
+	 * to the bit after the half significand.  In all other cases, we do.
+	 */
+	if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+		d_sig += 0x0000020000000000ULL;
+	}
+#else
+	d_sig += 0x0000020000000000ULL;
+#endif
+	h_sig = (npy_uint16)(d_sig >> 42);
+
+	/*
+	 * If the rounding causes a bit to spill into h_exp, it will
+	 * increment h_exp by one and h_sig will be zero.  This is the
+	 * correct result.  h_exp may increment to 15, at greatest, in
+	 * which case the result overflows to a signed inf.
+	 */
+#if NPY_HALF_GENERATE_OVERFLOW
+	h_sig += h_exp;
+	if (h_sig == 0x7c00u) {
+		npy_set_floatstatus_overflow();
+	}
+	return h_sgn + h_sig;
+#else
+	return h_sgn + h_exp + h_sig;
+#endif
+}
+
+static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
+{
+	npy_uint16 h_exp, h_sig;
+	npy_uint32 f_sgn, f_exp, f_sig;
+
+	h_exp = (h & 0x7c00u);
+	f_sgn = ((npy_uint32)h & 0x8000u) << 16;
+	switch (h_exp) {
+	case 0x0000u: /* 0 or subnormal */
+		h_sig = (h & 0x03ffu);
+		/* Signed zero */
+		if (h_sig == 0) {
+			return f_sgn;
+		}
+		/* Subnormal */
+		h_sig <<= 1;
+		while ((h_sig & 0x0400u) == 0) {
+			h_sig <<= 1;
+			h_exp++;
+		}
+		f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
+		f_sig = ((npy_uint32)(h_sig & 0x03ffu)) << 13;
+		return f_sgn + f_exp + f_sig;
+	case 0x7c00u: /* inf or NaN */
+		/* All-ones exponent and a copy of the significand */
+		return f_sgn + 0x7f800000u + (((npy_uint32)(h & 0x03ffu)) << 13);
+	default: /* normalized */
+		/* Just need to adjust the exponent and shift */
+		return f_sgn + (((npy_uint32)(h & 0x7fffu) + 0x1c000u) << 13);
+	}
+}
+
+static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
+{
+	npy_uint16 h_exp, h_sig;
+	npy_uint64 d_sgn, d_exp, d_sig;
+
+	h_exp = (h & 0x7c00u);
+	d_sgn = ((npy_uint64)h & 0x8000u) << 48;
+	switch (h_exp) {
+	case 0x0000u: /* 0 or subnormal */
+		h_sig = (h & 0x03ffu);
+		/* Signed zero */
+		if (h_sig == 0) {
+			return d_sgn;
+		}
+		/* Subnormal */
+		h_sig <<= 1;
+		while ((h_sig & 0x0400u) == 0) {
+			h_sig <<= 1;
+			h_exp++;
+		}
+		d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
+		d_sig = ((npy_uint64)(h_sig & 0x03ffu)) << 42;
+		return d_sgn + d_exp + d_sig;
+	case 0x7c00u: /* inf or NaN */
+		/* All-ones exponent and a copy of the significand */
+		return d_sgn + 0x7ff0000000000000ULL +
+			(((npy_uint64)(h & 0x03ffu)) << 42);
+	default: /* normalized */
+		/* Just need to adjust the exponent and shift */
+		return d_sgn + (((npy_uint64)(h & 0x7fffu) + 0xfc000u) << 42);
+	}
+}