add cpu half test library , dir is source/tensor/halfLib

41dbf0a9 · ltb · 1da50ae2 · 41dbf0a9 · 41dbf0a9 · 41dbf0a9
Commit 41dbf0a9 authored Aug 05, 2019 by ltb
--- a/source/tensor/halfLib/HalfFloat/HalfPrecisionFloatTest.cpp
+++ b/source/tensor/halfLib/HalfFloat/HalfPrecisionFloatTest.cpp
+#include <iostream>
+#include <assert.h>
+#include <direct.h>
+#include "../core/utilities/FlushToMem.h"
+#include "../core/getandset/ConvertDataType.h"
+#include "../XTensor.h"
+#include "umHalf.h"
+using namespace nts;
+//#define VALIDATE(x) if (!(x)){std::cout << "Failed: " <<  #x << std::endl;assert((x));}
+
+int main(int argc, char* argv[])
+{
+	char *path;
+	path = getcwd(NULL, 0);
+	strcat(path, "\\source\\tensor\\HalfFloat\\dump");
+
+	XTensor a;
+	XTensor halfa;
+	int dim = 4;
+	int devId = 0;
+	InitTensor2DV2(&a,dim,dim,X_FLOAT,devId);
+	a.SetDataRand(-2.0,2.0);
+
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfa.Dump(&halfa, stderr, "halfa:");
+	GPUToCPUFlush(&halfa);
+	FILE * file = fopen(path, "wb");
+	halfa.Dump(file, "halfa:");
+	fclose(file);
+
+	XTensor halfb;
+	InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
+	FILE *read = fopen(path, "rb");
+	halfb.Read(read, "halfa:");
+	fclose(read);
+	halfb.Dump(&halfb, stderr, "halfb:");
+	
+
+
+	//half h = 1.f, h2 = 2.f;
+	//--h2;
+	//++h2;
+	//--h;
+	//++h;
+	//h2 -= 1.f;
+	//float f = h2, f2 = h;
+	//VALIDATE(1.f == f && f == f2);
+	//half dddd = 15.5;
+	//float hhhh = 15.5;
+	//printf("%x\n", dddd);
+	//printf("%x\n", hhhh);
+	//h = h2;
+	//h2 = 15.5f;
+
+	//f = h2, f2 = h;
+	//VALIDATE(15.5f == f && 1.f == f2);
+	//h2 *= h;
+	//f = h2, f2 = h;
+	//VALIDATE(15.5f == f && 1.f == f2);
+	//h2 /= h;
+	//f = h2, f2 = h;
+	//VALIDATE(15.5f == f && 1.f == f2);
+	//h2 += h;
+	//f = h2, f2 = h;
+	//VALIDATE(16.5f == f && 1.f == f2);
+	//h++; h++; h++;
+	//h2 = -h2;
+	//h2 += 17.5f;
+	//h2 *= h;
+	//f = h2, f2 = h;
+	//VALIDATE(4.f == f && 4.f == f2);
+	//VALIDATE(h == h2);
+	//VALIDATE(h <= h2);
+	//--h;
+	//VALIDATE(h <= h2);
+
+	//h -= 250.f;
+	//VALIDATE(h < h2);
+
+	//h += 500.f;
+	//VALIDATE(h > h2);
+	//VALIDATE(h >= h2);
+
+	//f = h2, f2 = h;
+	//VALIDATE(h * h2 == (half)(f * f2));
+
+	//// addition
+	//// ****************************************************************************
+
+	//// identical exponents
+	//for (float f = 0.f; f < 1000.f; ++f)
+	//{
+	//	half one = f;
+	//	half two = f;
+	//	half three = one + two;
+	//	f2 = three;
+	//	VALIDATE(f*2.f == f2);
+	//}
+
+	//// different exponents
+	//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
+	//{
+	//	half one = f;
+	//	half two = fp;
+	//	half three = one + two;
+	//	f2 = three;
+	//	VALIDATE(f + fp == f2);
+	//}
+
+	//// very small numbers - this is already beyond the accuracy of 16 bit floats.
+	//for (float f = 0.003f; f < 1000.f; f += 0.0005f)
+	//{
+	//	half one = f;
+	//	half two = f;
+	//	half three = one + two;
+	//	f2 = three;
+	//	float m = f * 2.f;
+	//	VALIDATE(f2 > (m - 0.05*m) && f2 < (m + 0.05*m));
+	//}
+
+
+	//// subtraction
+	//// ****************************************************************************
+
+	//// identical exponents
+	//for (float f = 0.f; f < 1000.f; ++f)
+	//{
+	//	half one = f;
+	//	half two = f;
+	//	half three = one - two;
+	//	f2 = three;
+	//	VALIDATE(0.f == f2);
+	//}
+
+	//// different exponents
+	//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
+	//{
+	//	half one = f;
+	//	half two = fp;
+	//	half three = one - two;
+	//	f2 = three;
+	//	VALIDATE(f - fp == f2);
+	//}
+	return 0;
+}
+
--- a/source/tensor/halfLib/HalfFloat/a.md
+++ b/source/tensor/halfLib/HalfFloat/a.md
+https://github.com/acgessler/half_float
+
+C++ implementation of a 16 bit floating-point type mimicking most of the IEEE 754 behaviour. Compatible with the half data type used as texture format by OpenGl/Direct3D.
\ No newline at end of file
--- a/source/tensor/halfLib/HalfFloat/dump
+++ b/source/tensor/halfLib/HalfFloat/dump
+halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
+be2c 3ffd bf2c 3c52 a8f6 3a6a afcf 3eca 3e47 3852 bf6e 3bc8 bff5 bc12 b266 31a4
--- a/source/tensor/halfLib/HalfFloat/main.cpp
+++ b/source/tensor/halfLib/HalfFloat/main.cpp
+#include <iostream>
+#include <assert.h>
+#include <direct.h>
+#include "../../core/utilities/FlushToMem.h"
+#include "../../core/getandset/ConvertDataType.h"
+#include "../../XTensor.h"
+#include "../../XGlobal.h"
+#include "umHalf.h"
+
+using namespace nts;
+
+int main(int argc, char* argv[])
+{
+	char *path;
+	path = getcwd(NULL, 0);
+	strcat(path, "\\source\\tensor\\halfLib\\HalfFloat\\dump");
+
+	XTensor a;
+	XTensor halfa;
+	int dim = 4;
+	int devId = 0;
+	InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
+	a.SetDataRand(-2.0, 2.0);
+
+	halfa = ConvertDataType(a, X_FLOAT16);
+	printf("============save model================\n");
+
+	halfa.Dump(&halfa, stderr, "halfa:");
+	GPUToCPUFlush(&halfa);
+	
+	FILE * file = fopen(path, "wb");
+	halfa.Dump(file, "halfa:");
+	//a.Dump(file, "a");
+	fclose(file);
+
+	XTensor halfb;
+	InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
+
+	XTensor b;
+	InitTensor2DV2(&b, dim, dim, X_FLOAT, devId);
+	printf("==============read model=============\n");
+	FILE *read = fopen(path, "rb");
+	halfb.Read(read, "halfa:");
+	//b.Read(read, "a");
+	fclose(read);
+
+	halfb.Dump(&halfb, stderr, "halfb:");
+
+	return 0;
+}
\ No newline at end of file
--- a/source/tensor/halfLib/HalfFloat/stdint.h
+++ b/source/tensor/halfLib/HalfFloat/stdint.h
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+extern "C++" {
+#endif 
+#     include <wchar.h>
+#if (_MSC_VER < 1300) && defined(__cplusplus)
+}
+#endif
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+typedef __int8            int8_t;
+typedef __int16           int16_t;
+typedef __int32           int32_t;
+typedef __int64           int64_t;
+typedef unsigned __int8   uint8_t;
+typedef unsigned __int16  uint16_t;
+typedef unsigned __int32  uint32_t;
+typedef unsigned __int64  uint64_t;
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+typedef __int64           intptr_t;
+typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+typedef int               intptr_t;
+typedef unsigned int      uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
\ No newline at end of file
--- a/source/tensor/halfLib/HalfFloat/umHalf.h
+++ b/source/tensor/halfLib/HalfFloat/umHalf.h
+///////////////////////////////////////////////////////////////////////////////////
+/*
+Copyright (c) 2006-2008,
+Chris "Krishty" Maiwald, Alexander "Aramis" Gessler
+
+All rights reserved.
+
+Redistribution and use of this software in source and binary forms,
+with or without modification, are permitted provided that the following
+conditions are met:
+
+* Redistributions of source code must retain the above
+  copyright notice, this list of conditions and the
+  following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the
+  following disclaimer in the documentation and/or other
+  materials provided with the distribution.
+
+* Neither the name of the class, nor the names of its
+  contributors may be used to endorse or promote products
+  derived from this software without specific prior
+  written permission of the Development Team.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+///////////////////////////////////////////////////////////////////////////////////
+
+#ifndef UM_HALF_H_INCLUDED
+#define UM_HALF_H_INCLUDED
+
+#include <limits>
+#include <algorithm>
+
+//#ifdef _MSC_VER
+//#include "stdint.h"
+//#else
+//#include <stdint.h>
+//#endif
+#include<stdint.h>
+
+#undef min
+#undef max
+
+///////////////////////////////////////////////////////////////////////////////////
+/** 1. Represents a half-precision floating point value (16 bits) that behaves
+ *  nearly conformant to the IEE 754 standard for floating-point computations.
+ *
+ *  Not all operators have special implementations, most perform time-consuming
+ *  conversions from half to float and back again.
+ *  Differences to IEEE 754:
+ *  - no difference between qnan and snan
+ *  - no traps
+ *  - no well-defined rounding mode
+ */
+ ///////////////////////////////////////////////////////////////////////////////////
+class HalfFloat
+{
+	friend HalfFloat operator+ (HalfFloat, HalfFloat);
+	friend HalfFloat operator- (HalfFloat, HalfFloat);
+	friend HalfFloat operator* (HalfFloat, HalfFloat);
+	friend HalfFloat operator/ (HalfFloat, HalfFloat);
+
+public:
+
+	enum { BITS_MANTISSA = 10 };
+	enum { BITS_EXPONENT = 5 };
+
+	enum { MAX_EXPONENT_VALUE = 31 };
+	enum { BIAS = MAX_EXPONENT_VALUE / 2 };
+
+	enum { MAX_EXPONENT = BIAS };
+	enum { MIN_EXPONENT = -BIAS };
+
+	enum { MAX_EXPONENT10 = 9 };
+	enum { MIN_EXPONENT10 = -9 };
+
+public:
+
+	/** Default constructor. Unitialized by default.
+	 */
+	inline HalfFloat() {}
+
+	/** Construction from an existing half
+	 */
+	inline HalfFloat(const HalfFloat& other)
+		: bits(other.GetBits())
+	{}
+
+	/** Construction from existing values for mantissa, sign
+	 *  and exponent. No validation is performed.
+	 *  @note The exponent is unsigned and biased by #BIAS
+	 */
+	inline HalfFloat(uint16_t _m, uint16_t _e, uint16_t _s);
+
+
+	/** Construction from a single-precision float
+	 */
+	inline HalfFloat(float other);
+
+	/** Construction from a double-precision float
+	 */
+	inline HalfFloat(const double);
+
+
+
+	/** Conversion operator to convert from half to float
+	 */
+	inline operator float() const;
+
+	/** Conversion operator to convert from half to double
+	 */
+	inline operator double() const;
+
+
+
+	/** Assignment operator to assign another half to
+	 *  *this* object.
+	 */
+	inline HalfFloat& operator= (HalfFloat other);
+	inline HalfFloat& operator= (float other);
+	inline HalfFloat& operator= (const double other);
+
+
+	/** Comparison operators
+	 */
+	inline bool operator== (HalfFloat other) const;
+	inline bool operator!= (HalfFloat other) const;
+
+
+	/** Relational comparison operators
+	 */
+	inline bool operator<  (HalfFloat other) const;
+	inline bool operator>  (HalfFloat other) const;
+	inline bool operator<= (HalfFloat other) const;
+	inline bool operator>= (HalfFloat other) const;
+
+	inline bool operator<  (float other) const;
+	inline bool operator>  (float other) const;
+	inline bool operator<= (float other) const;
+	inline bool operator>= (float other) const;
+
+
+	/** Combined assignment operators
+	 */
+	inline HalfFloat& operator += (HalfFloat other);
+	inline HalfFloat& operator -= (HalfFloat other);
+	inline HalfFloat& operator *= (HalfFloat other);
+	inline HalfFloat& operator /= (HalfFloat other);
+
+	inline HalfFloat& operator += (float other);
+	inline HalfFloat& operator -= (float other);
+	inline HalfFloat& operator *= (float other);
+	inline HalfFloat& operator /= (float other);
+
+	/** Post and prefix increment operators
+	 */
+	inline HalfFloat& operator++();
+	inline HalfFloat operator++(int);
+
+	/** Post and prefix decrement operators
+	 */
+	inline HalfFloat& operator--();
+	inline HalfFloat operator--(int);
+
+	/** Unary minus operator
+	 */
+	inline HalfFloat operator-() const;
+
+
+	/** Provides direct access to the bits of a half float
+	 */
+	inline uint16_t GetBits() const;
+	inline uint16_t& GetBits();
+
+
+	/** Classification of floating-point types
+	 */
+	inline bool IsNaN() const;
+	inline bool IsInfinity() const;
+	inline bool IsDenorm() const;
+
+	/** Returns the sign of the floating-point value -
+	 *  true stands for positive.
+	 */
+	inline bool GetSign() const;
+
+public:
+
+	union
+	{
+		uint16_t bits;			// All bits
+		struct
+		{
+			uint16_t Frac : 10;	// mantissa
+			uint16_t Exp : 5;		// exponent
+			uint16_t Sign : 1;		// sign
+		} IEEE;
+	};
+
+
+	union IEEESingle
+	{
+		float Float;
+		struct
+		{
+			uint32_t Frac : 23;
+			uint32_t Exp : 8;
+			uint32_t Sign : 1;
+		} IEEE;
+	};
+
+	union IEEEDouble
+	{
+		double Double;
+		struct {
+			uint64_t Frac : 52;
+			uint64_t Exp : 11;
+			uint64_t Sign : 1;
+		} IEEE;
+	};
+
+	// Enums can not store 64 bit values, so we have to use static constants.
+	static const uint64_t IEEEDouble_MaxExpontent = 0x7FF;
+	static const uint64_t IEEEDouble_ExponentBias = IEEEDouble_MaxExpontent / 2;
+};
+
+/** 2. Binary operations
+ */
+inline HalfFloat operator+ (HalfFloat one, HalfFloat two);
+inline HalfFloat operator- (HalfFloat one, HalfFloat two);
+inline HalfFloat operator* (HalfFloat one, HalfFloat two);
+inline HalfFloat operator/ (HalfFloat one, HalfFloat two);
+
+inline float operator+ (HalfFloat one, float two);
+inline float operator- (HalfFloat one, float two);
+inline float operator* (HalfFloat one, float two);
+inline float operator/ (HalfFloat one, float two);
+
+inline float operator+ (float one, HalfFloat two);
+inline float operator- (float one, HalfFloat two);
+inline float operator* (float one, HalfFloat two);
+inline float operator/ (float one, HalfFloat two);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////
+/** 3. Specialization of std::numeric_limits for type half.
+ */
+ ///////////////////////////////////////////////////////////////////////////////////
+namespace std {
+	template <>
+	class numeric_limits<HalfFloat> {
+
+	public:
+
+		// General -- meaningful for all specializations.
+
+		static const bool is_specialized = true;
+		static HalfFloat min()
+		{
+			return HalfFloat(0, 1, 0);
+		}
+		static HalfFloat max()
+		{
+			return HalfFloat(~0, HalfFloat::MAX_EXPONENT_VALUE - 1, 0);
+		}
+		static const int radix = 2;
+		static const int digits = 10;   // conservative assumption
+		static const int digits10 = 2;  // conservative assumption
+		static const bool is_signed = true;
+		static const bool is_integer = true;
+		static const bool is_exact = false;
+		static const bool traps = false;
+		static const bool is_modulo = false;
+		static const bool is_bounded = true;
+
+		// Floating point specific.
+
+		static HalfFloat epsilon()
+		{
+			return HalfFloat(0.00097656f);
+		} // from OpenEXR, needs to be confirmed
+		static HalfFloat round_error()
+		{
+			return HalfFloat(0.00097656f / 2);
+		}
+		static const int min_exponent10 = HalfFloat::MIN_EXPONENT10;
+		static const int max_exponent10 = HalfFloat::MAX_EXPONENT10;
+		static const int min_exponent = HalfFloat::MIN_EXPONENT;
+		static const int max_exponent = HalfFloat::MAX_EXPONENT;
+
+		static const bool has_infinity = true;
+		static const bool has_quiet_NaN = true;
+		static const bool has_signaling_NaN = true;
+		static const bool is_iec559 = false;
+		static const bool has_denorm = denorm_present;
+		static const bool tinyness_before = false;
+		static const float_round_style round_style = round_to_nearest;
+
+		static HalfFloat denorm_min()
+		{
+			return HalfFloat(1, 0, 1);
+		}
+		static HalfFloat infinity()
+		{
+			return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, 0);
+		}
+		static HalfFloat quiet_NaN()
+		{
+			return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
+		}
+		static HalfFloat signaling_NaN()
+		{
+			return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
+		}
+	};
+} // end namespace std
+
+
+#include "./umHalf.inl"
+
+#ifndef UM_HALF_NO_TYPEDEFS
+typedef HalfFloat float16;
+typedef HalfFloat halfCPU;
+#endif
+
+#endif // !! UM_HALF_H_INCLUDED
--- a/source/tensor/halfLib/HalfFloat/umHalf.inl
+++ b/source/tensor/halfLib/HalfFloat/umHalf.inl
+///////////////////////////////////////////////////////////////////////////////////
+/*
+Copyright (c) 2006-2008, Alexander Gessler
+
+All rights reserved.
+
+Redistribution and use of this software in source and binary forms,
+with or without modification, are permitted provided that the following
+conditions are met:
+
+* Redistributions of source code must retain the above
+  copyright notice, this list of conditions and the
+  following disclaimer.
+
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the
+  following disclaimer in the documentation and/or other
+  materials provided with the distribution.
+
+* Neither the name of the ASSIMP team, nor the names of its
+  contributors may be used to endorse or promote products
+  derived from this software without specific prior
+  written permission of the ASSIMP Development Team.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+///////////////////////////////////////////////////////////////////////////////////
+
+#ifndef UM_HALF_INL_INCLUDED
+#define UM_HALF_INL_INCLUDED
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+#endif
+
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat::HalfFloat(float other)
+{
+	IEEESingle f;
+	f.Float = other;
+
+	IEEE.Sign = f.IEEE.Sign;
+
+	if (!f.IEEE.Exp)
+	{
+		IEEE.Frac = 0;
+		IEEE.Exp = 0;
+	}
+	else if (f.IEEE.Exp == 0xff)
+	{
+		// NaN or INF
+		IEEE.Frac = (f.IEEE.Frac != 0) ? 1 : 0;
+		IEEE.Exp = 31;
+	}
+	else
+	{
+		// regular number
+		int new_exp = f.IEEE.Exp - 127;
+
+		if (new_exp < -24)
+		{ // this maps to 0
+			IEEE.Frac = 0;
+			IEEE.Exp = 0;
+		}
+
+		else if (new_exp < -14)
+		{
+			// this maps to a denorm
+			IEEE.Exp = 0;
+			unsigned int exp_val = (unsigned int)(-14 - new_exp);  // 2^-exp_val
+			switch (exp_val)
+			{
+			case 0:
+				IEEE.Frac = 0;
+				break;
+			case 1: IEEE.Frac = 512 + (f.IEEE.Frac >> 14); break;
+			case 2: IEEE.Frac = 256 + (f.IEEE.Frac >> 15); break;
+			case 3: IEEE.Frac = 128 + (f.IEEE.Frac >> 16); break;
+			case 4: IEEE.Frac = 64 + (f.IEEE.Frac >> 17); break;
+			case 5: IEEE.Frac = 32 + (f.IEEE.Frac >> 18); break;
+			case 6: IEEE.Frac = 16 + (f.IEEE.Frac >> 19); break;
+			case 7: IEEE.Frac = 8 + (f.IEEE.Frac >> 20); break;
+			case 8: IEEE.Frac = 4 + (f.IEEE.Frac >> 21); break;
+			case 9: IEEE.Frac = 2 + (f.IEEE.Frac >> 22); break;
+			case 10: IEEE.Frac = 1; break;
+			}
+		}
+		else if (new_exp > 15)
+		{ // map this value to infinity
+			IEEE.Frac = 0;
+			IEEE.Exp = 31;
+		}
+		else
+		{
+			IEEE.Exp = new_exp + 15;
+			IEEE.Frac = (f.IEEE.Frac >> 13);
+		}
+	}
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat::HalfFloat(const double p_Reference)
+{
+	const IEEEDouble & l_Reference = reinterpret_cast<const IEEEDouble &>(p_Reference);
+
+	// Copy the sign bit.
+	this->IEEE.Sign = l_Reference.IEEE.Sign;
+
+	// Check for special values: Is the exponent zero?
+	if (0 == l_Reference.IEEE.Exp)
+	{
+		// A zero exponent indicates either a zero or a subnormal number. A subnormal float can not
+		//	be represented as a half, so either one will be saved as a zero.
+		this->IEEE.Exp = 0;
+		this->IEEE.Frac = 0;
+	}
+	// Is the exponent all one?
+	else if (IEEEDouble_MaxExpontent == l_Reference.IEEE.Exp)
+	{
+		this->IEEE.Exp = MAX_EXPONENT_VALUE;
+		// A zero fraction indicates an Infinite value.
+		if (0 == l_Reference.IEEE.Frac)
+			this->IEEE.Frac = 0;
+		// A nonzero fraction indicates NaN. Such a fraction contains further information, e.g. to
+		//	distinguish a QNaN from a SNaN. However, we can not just shift-copy the fraction:
+		//	if the first five bits were zero we would save an infinite value, so we abandon the
+		//	fraction information and set it to a nonzero value.
+		else
+			this->IEEE.Frac = 1;
+	}
+	// A usual value?
+	else {
+		// First, we have to adjust the exponent. It is stored as an unsigned int, to reconstruct
+		//	its original value we have to subtract its bias (half of its range).
+		const int64_t l_AdjustedExponent = l_Reference.IEEE.Exp - IEEEDouble_ExponentBias;
+
+		// Very small values will be rounded to zero.
+		if (-24 > l_AdjustedExponent)
+		{
+			this->IEEE.Frac = 0;
+			this->IEEE.Exp = 0;
+		}
+		// Some small values can be stored as subnormal values.
+		else if (-14 > l_AdjustedExponent)
+		{
+			// The exponent of subnormal values is always be zero.
+			this->IEEE.Exp = 0;
+			// The exponent will now be stored in the fraction.
+			const int16_t l_NewExponent = int16_t(-14 - l_AdjustedExponent);  // 2 ^ -l_NewExponent
+			this->IEEE.Frac = (1024 >> l_NewExponent) + int16_t(l_Reference.IEEE.Frac >> (42 + l_NewExponent));
+		}
+		// Very large numbers will be rounded to infinity.
+		else if (15 < l_AdjustedExponent)
+		{
+			// Exponent all one, fraction zero.
+			this->IEEE.Exp = MAX_EXPONENT_VALUE;
+			this->IEEE.Frac = 0;
+		}
+		// All remaining numbers can be converted directly.
+		else
+		{
+			// We reconstructed the exponent by subtracting the bias. To store it as an unsigned
+			//	int, we need to add the bias again.
+			this->IEEE.Exp = l_AdjustedExponent + BIAS;
+			// When storing the fraction, we abandon its least significant bits by right-shifting.
+			//	The fraction of a double is 42 bits wider than that of a half, so we shift 42 bits.
+			this->IEEE.Frac = (l_Reference.IEEE.Frac >> 42);
+		};
+	}; // else usual number
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat::HalfFloat(uint16_t _m, uint16_t _e, uint16_t _s)
+{
+	IEEE.Frac = _m;
+	IEEE.Exp = _e;
+	IEEE.Sign = _s;
+}
+// ------------------------------------------------------------------------------------------------
+HalfFloat::operator float() const
+{
+	IEEESingle sng;
+	sng.IEEE.Sign = IEEE.Sign;
+
+	if (!IEEE.Exp)
+	{
+		if (!IEEE.Frac)
+		{
+			sng.IEEE.Frac = 0;
+			sng.IEEE.Exp = 0;
+		}
+		else
+		{
+			const float half_denorm = (1.0f / 16384.0f);
+			float mantissa = ((float)(IEEE.Frac)) / 1024.0f;
+			float sgn = (IEEE.Sign) ? -1.0f : 1.0f;
+			sng.Float = sgn * mantissa*half_denorm;
+		}
+	}
+	else if (31 == IEEE.Exp)
+	{
+		sng.IEEE.Exp = 0xff;
+		sng.IEEE.Frac = (IEEE.Frac != 0) ? 1 : 0;
+	}
+	else
+	{
+		sng.IEEE.Exp = IEEE.Exp + 112;
+		sng.IEEE.Frac = (IEEE.Frac << 13);
+	}
+	return sng.Float;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat::operator double(void) const
+{
+	IEEEDouble l_Result;
+
+	// Copy the sign bit.
+	l_Result.IEEE.Sign = this->IEEE.Sign;
+
+	// In a zero, both the exponent and the fraction are zero.
+	if ((0 == this->IEEE.Exp) && (0 == this->IEEE.Frac))
+	{
+		l_Result.IEEE.Exp = 0;
+		l_Result.IEEE.Frac = 0;
+	}
+	// If the exponent is zero and the fraction is nonzero, the number is subnormal.
+	else if ((0 == this->IEEE.Exp) && (0 != this->IEEE.Frac))
+	{
+		// sign * 2^-14 * fraction
+		l_Result.Double = (this->IEEE.Sign ? -1.0 : +1.0) / 16384.0 * (double(this->IEEE.Frac) / 1024.0);
+	}
+	// Is the exponent all one?
+	else if (MAX_EXPONENT_VALUE == this->IEEE.Exp)
+	{
+		l_Result.IEEE.Exp = IEEEDouble_MaxExpontent;
+		// A zero fraction indicates an infinite value.
+		if (0 == this->IEEE.Frac)
+			l_Result.IEEE.Frac = 0;
+		// A nonzero fraction indicates a NaN. We can re-use the fraction information: a double
+		//	fraction is 42 bits wider than a half fraction, so we can just left-shift it. Any
+		//	information on QNaNs or SNaNs will be preserved.
+		else
+			l_Result.IEEE.Frac = uint64_t(this->IEEE.Frac) << 42;
+	}
+	// A usual value?
+	else
+	{
+		// The exponent is stored as an unsigned int. To reconstruct its original value, we have to
+		//	subtract its bias. To re-store it in a wider bit field, we must add the bias of the new
+		//	bit field.
+		l_Result.IEEE.Exp = uint64_t(this->IEEE.Exp) - BIAS + IEEEDouble_ExponentBias;
+		// A double fraction is 42 bits wider than a half fraction, so we can just left-shift it.
+		l_Result.IEEE.Frac = uint64_t(this->IEEE.Frac) << 42;
+	}
+	return l_Result.Double;
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::IsNaN() const
+{
+	return IEEE.Frac != 0 && IEEE.Exp == MAX_EXPONENT_VALUE;
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::IsInfinity() const
+{
+	return IEEE.Frac == 0 && IEEE.Exp == MAX_EXPONENT_VALUE;
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::IsDenorm() const
+{
+	return IEEE.Exp == 0;
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::GetSign() const
+{
+	return IEEE.Sign == 0;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator= (HalfFloat other)
+{
+	bits = other.GetBits();
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator= (float other)
+{
+	*this = (HalfFloat)other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator= (const double p_Reference)
+{
+	return (*this) = HalfFloat(p_Reference);
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::operator== (HalfFloat other) const
+{
+	// +0 and -0 are considered to be equal
+	if (!(bits << 1u) && !(other.bits << 1u))return true;
+
+	return bits == other.bits && !this->IsNaN();
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::operator!= (HalfFloat other) const
+{
+	// +0 and -0 are considered to be equal
+	if (!(bits << 1u) && !(other.bits << 1u))return false;
+
+	return bits != other.bits || this->IsNaN();
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::operator<  (HalfFloat other) const
+{
+	// NaN comparisons are always false
+	if (this->IsNaN() || other.IsNaN())
+		return false;
+
+	// this works since the segment oder is s,e,m.
+	return (int16_t)this->bits < (int16_t)other.GetBits();
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::operator>  (HalfFloat other) const
+{
+	// NaN comparisons are always false
+	if (this->IsNaN() || other.IsNaN())
+		return false;
+
+	// this works since the segment oder is s,e,m.
+	return (int16_t)this->bits > (int16_t)other.GetBits();
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::operator<= (HalfFloat other) const
+{
+	return !(*this > other);
+}
+// ------------------------------------------------------------------------------------------------
+inline bool HalfFloat::operator>= (HalfFloat other) const
+{
+	return !(*this < other);
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator += (HalfFloat other)
+{
+	*this = (*this) + other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator -= (HalfFloat other)
+{
+	*this = (*this) - other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator *= (HalfFloat other)
+{
+	*this = (float)(*this) * (float)other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator /= (HalfFloat other)
+{
+	*this = (float)(*this) / (float)other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator += (float other)
+{
+	*this = (*this) + (HalfFloat)other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator -= (float other)
+{
+	*this = (*this) - (HalfFloat)other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator *= (float other)
+{
+	*this = (float)(*this) * other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator /= (float other)
+{
+	*this = (float)(*this) / other;
+	return *this;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator++()
+{
+	// setting the exponent to bias means using 0 as exponent - thus we
+	// can set the mantissa to any value we like, we'll always get 1.0
+	return this->operator+=(HalfFloat(0, BIAS, 0));
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat HalfFloat::operator++(int)
+{
+	HalfFloat f = *this;
+	this->operator+=(HalfFloat(0, BIAS, 0));
+	return f;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat& HalfFloat::operator--()
+{
+	return this->operator-=(HalfFloat(0, BIAS, 0));
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat HalfFloat::operator--(int)
+{
+	HalfFloat f = *this;
+	this->operator-=(HalfFloat(0, BIAS, 0));
+	return f;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat HalfFloat::operator-() const
+{
+	return HalfFloat(IEEE.Frac, IEEE.Exp, ~IEEE.Sign);
+}
+// ------------------------------------------------------------------------------------------------
+inline uint16_t HalfFloat::GetBits() const
+{
+	return bits;
+}
+// ------------------------------------------------------------------------------------------------
+inline uint16_t& HalfFloat::GetBits()
+{
+	return bits;
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat operator+ (HalfFloat one, HalfFloat two)
+{
+#if (!defined HALFFLOAT_NO_CUSTOM_IMPLEMENTATIONS)
+
+	if (one.IEEE.Exp == HalfFloat::MAX_EXPONENT_VALUE)
+	{
+		// if one of the components is NaN the result becomes NaN, too.
+		if (0 != one.IEEE.Frac || two.IsNaN())
+			return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
+
+		// otherwise this must be infinity
+		return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, one.IEEE.Sign | two.IEEE.Sign);
+	}
+	else if (two.IEEE.Exp == HalfFloat::MAX_EXPONENT_VALUE)
+	{
+		if (one.IsNaN() || 0 != two.IEEE.Frac)
+			return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
+
+		return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, one.IEEE.Sign | two.IEEE.Sign);
+	}
+
+	HalfFloat out;
+	long m1, m2, temp;
+
+	// compute the difference between the two exponents. shifts with negative
+	// numbers are undefined, thus we need two code paths
+	register int expDiff = one.IEEE.Exp - two.IEEE.Exp;
+
+	if (0 == expDiff)
+	{
+		// the exponents are equal, thus we must just add the hidden bit
+		temp = two.IEEE.Exp;
+
+		if (0 == one.IEEE.Exp)m1 = one.IEEE.Frac;
+		else m1 = (int)one.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
+
+		if (0 == two.IEEE.Exp)m2 = two.IEEE.Frac;
+		else m2 = (int)two.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
+	}
+	else
+	{
+		if (expDiff < 0)
+		{
+			expDiff = -expDiff;
+			std::swap(one, two);
+		}
+
+		m1 = (int)one.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
+
+		if (0 == two.IEEE.Exp)m2 = two.IEEE.Frac;
+		else m2 = (int)two.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
+
+		if (expDiff < ((sizeof(long) << 3) - (HalfFloat::BITS_MANTISSA + 1)))
+		{
+			m1 <<= expDiff;
+			temp = two.IEEE.Exp;
+		}
+		else
+		{
+			if (0 != two.IEEE.Exp)
+			{
+				// arithmetic underflow
+				if (expDiff > HalfFloat::BITS_MANTISSA)return HalfFloat(0, 0, 0);
+				else
+				{
+					m2 >>= expDiff;
+				}
+			}
+			temp = one.IEEE.Exp;
+		}
+	}
+
+	// convert from sign-bit to two's complement representation
+	if (one.IEEE.Sign)m1 = -m1;
+	if (two.IEEE.Sign)m2 = -m2;
+	m1 += m2;
+	if (m1 < 0)
+	{
+		out.IEEE.Sign = 1;
+		m1 = -m1;
+	}
+	else out.IEEE.Sign = 0;
+
+	// and renormalize the result to fit in a half
+	if (0 == m1)return HalfFloat(0, 0, 0);
+
+#ifdef _MSC_VER
+	_BitScanReverse((unsigned long*)&m2, m1);
+#else
+	m2 = __builtin_clz(m1);
+#endif
+	expDiff = m2 - HalfFloat::BITS_MANTISSA;
+	temp += expDiff;
+	if (expDiff >= HalfFloat::MAX_EXPONENT_VALUE)
+	{
+		// arithmetic overflow. return INF and keep the sign
+		return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, out.IEEE.Sign);
+	}
+	else if (temp <= 0)
+	{
+		// this maps to a denorm
+		m1 <<= (-expDiff - 1);
+		temp = 0;
+	}
+	else
+	{
+		// rebuild the normalized representation, take care of the hidden bit
+		if (expDiff < 0)m1 <<= (-expDiff);
+		else m1 >>= expDiff; // m1 >= 0
+	}
+	out.IEEE.Frac = m1;
+	out.IEEE.Exp = temp;
+	return out;
+
+#else
+	return HalfFloat((float)one + (float)two);
+#endif
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat operator- (HalfFloat one, HalfFloat two)
+{
+	return HalfFloat(one + (-two));
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat operator* (HalfFloat one, HalfFloat two)
+{
+	return HalfFloat((float)one * (float)two);
+}
+// ------------------------------------------------------------------------------------------------
+inline HalfFloat operator/ (HalfFloat one, HalfFloat two)
+{
+	return HalfFloat((float)one / (float)two);
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator+ (HalfFloat one, float two)
+{
+	return (float)one + two;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator- (HalfFloat one, float two)
+{
+	return (float)one - two;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator* (HalfFloat one, float two)
+{
+	return (float)one * two;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator/ (HalfFloat one, float two)
+{
+	return (float)one / two;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator+ (float one, HalfFloat two)
+{
+	return two + one;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator- (float one, HalfFloat two)
+{
+	return two - one;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator* (float one, HalfFloat two)
+{
+	return two * one;
+}
+// ------------------------------------------------------------------------------------------------
+inline float operator/ (float one, HalfFloat two)
+{
+	return two / one;
+}
+
+#endif //!! UM_HALF_INL_INCLUDED
--- a/source/tensor/halfLib/half/a.md
+++ b/source/tensor/halfLib/half/a.md
+#pragma once
--- a/source/tensor/halfLib/half/dump
+++ b/source/tensor/halfLib/half/dump
+halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
+bc68 342d ae59 bcd7 b46a 3c1c 2c25 beb9 bcaf 3d72 3fc2 38d0 bd6b bce4 3854 ad13
--- a/source/tensor/halfLib/half/half.hpp
+++ b/source/tensor/halfLib/half/half.hpp
--- a/source/tensor/halfLib/half/main.cpp
+++ b/source/tensor/halfLib/half/main.cpp
+#include <stdio.h>
+#include <direct.h>
+#include "../../core/CHeader.h"
+#include "../../core/utilities/FlushToMem.h"
+#include "../../core/getandset/ConvertDataType.h"
+#include "../../XTensor.h"
+#include "../../XGlobal.h"
+
+using namespace nts;
+
+int main(int argc, const char ** argv) {
+	char *path;
+	path = getcwd(NULL, 0);
+	strcat(path, "\\source\\tensor\\halfLib\\half\\dump");
+
+	int dim = 4;
+	int devId = 0;
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+
+	XTensor halfa;
+	XTensor halfb;
+	XTensor halfc;
+
+	InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
+	InitTensor2DV2(&c, dim, dim, X_FLOAT, devId);
+	InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
+
+	a.SetDataRand(-2.0, 2.0);
+	c.SetDataRand(-2.0, 2.0);
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfc = ConvertDataType(c, X_FLOAT16);
+
+	printf("============save model================\n");
+	halfa.Dump(&halfa, stderr, "halfa:");
+	GPUToCPUFlush(&halfa);
+
+	FILE * file = fopen(path, "wb");
+	halfa.Dump(file, "halfa:");
+	//a.Dump(file, "a");
+	fclose(file);
+
+	printf("==============read model=============\n");
+	FILE *read = fopen(path, "rb");
+	halfb.Read(read, "halfa:");
+	//b.Read(read, "a");
+	fclose(read);
+	halfb.Dump(&halfb, stderr, "halfb:");
+	printf("==============BMMUL=============\n");
+	b = BMMul(a, X_NOTRANS, c, X_NOTRANS);
+	b.Dump(stderr,"b:");
+	printf("==============BMMUL-float=============\n");
+	halfa= BMMul(halfb, X_NOTRANS, halfc, X_NOTRANS);
+	halfa.Dump(&halfa, stderr, "halfla:");
+	
+	return 0;
+}
\ No newline at end of file
--- a/source/tensor/halfLib/half/matmul.cu
+++ b/source/tensor/halfLib/half/matmul.cu
+#include <stdlib.h>
+#include <stdio.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <time.h>
+#include <cuda_fp16.h>
+
+//#ifndef HALF_ARITHMETIC_TYPE
+//#define HALF_ARITHMETIC_TYPE
+//#endif // !HALF_ARITHMETIC_TYPE
+
+#include "half.hpp"
+
+using half_float::halfFloat;
+
+typedef half_float::halfFloat halfC;
+
+__global__ void matrixMulKernel(__half *C, __half *A, __half *B) {
+	int i = blockIdx.x * blockDim.x + threadIdx.x;
+	C[i] = A[i] * B[i];
+}
+
+void constantInit(halfC *data, int size, halfC val) {
+	for (int i = 0; i < size; ++i) {
+		data[i] = val;
+	}
+}
+
+
+void matrixMul() {
+
+	unsigned int N = 128;
+	unsigned int size = N * sizeof(halfC);
+
+	halfC *h_A = (halfC*)malloc(size);
+	halfC *h_B = (halfC*)malloc(size);
+	halfC *h_C = (halfC*)malloc(size);
+	halfC *h_D = (halfC*)malloc(size);
+
+	// Initialize host memory
+	const halfC valB = (halfC)0.01f;
+	constantInit(h_A, N, (halfC)1.0f);
+	constantInit(h_B, N, valB);
+
+	__half *d_A, *d_B, *d_C;
+	cudaMalloc((void**)&d_A, size);
+	cudaMalloc((void**)&d_B, size);
+	cudaMalloc((void**)&d_C, size);
+
+	//copy host memory to device
+	cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
+	cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
+
+	//config dims
+	dim3 block(16, 16);
+	dim3 grid(N / block.x, N / block.y);
+
+	// Excute the kernel
+	matrixMulKernel << <grid, block >> > (d_C, d_A, d_B);
+
+	// Copy the memory from device to host
+	cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
+
+	//printf("Checking computed result for correctness: ");
+	//bool correct = true;
+	//// test relative error by the formula
+	////     |<x, y>_cpu - <x,y>_gpu|/<|x|, |y|>  < eps
+	//double eps = 1.e-6; // machine zero
+
+	for (int k = 0; k < N; k++) {
+		h_D[k] = h_A[k] * h_B[k];
+	}
+
+	for (int i = 0; i < N; i++) {
+		printf("%hx--%hx ", h_C[i], h_D[i]);
+		if ((i + 1) % 8 == 0)
+			printf("\n");
+	}
+
+	//for (int i = 0; i < width*height; i++) {
+	//	double abs_err = fabs(h_C[i] - (width * valB));
+	//	double dot_length = width;
+	//	double abs_val = fabs(h_C[i]);
+	//	double rel_err = abs_err / abs_val / dot_length;
+	//	if (rel_err > eps)
+	//	{
+	//		printf("Error! Matrix[%05d]=%.8f, ref=%.8f error term is > %E\n", i, h_C[i], (float)(width*height), eps);
+	//		correct = false;
+	//	}
+	//}
+	//printf("%s\n", correct ? "Result = PASS" : "Result = FAIL");
+
+	// Free
+	free(h_A);
+	free(h_B);
+	free(h_C);
+	cudaFree(d_A);
+	cudaFree(d_B);
+	cudaFree(d_C);
+}
+
+int main() {
+	matrixMul();
+}
+
+//
+//#define THREAD_NUM 256
+//#define MATRIX_SIZE 4
+//const halfC blocks_num = MATRIX_SIZE * (MATRIX_SIZE + THREAD_NUM - 1) / THREAD_NUM;
+//
+//__global__ static void matMultCUDA(const __half* a, const __half* b, __half* c, halfC n, clock_t* time)
+//{
+//
+//	//表示目前的 thread 是第几个 thread（由 0 开始计算）
+//	const halfC tid = threadIdx.x;
+//
+//	//表示目前的 thread 属于第几个 block（由 0 开始计算）
+//	const halfC bid = blockIdx.x;
+//
+//	//从 bid 和 tid 计算出这个 thread 应该计算的 row 和 column
+//	const halfC idx = bid * THREAD_NUM + tid;
+//	const halfC row = idx / n;
+//	const halfC column = idx % n;
+//
+//	halfC i;
+//
+//	//记录运算开始的时间
+//	clock_t start;
+//
+//	//只在 thread 0（即 threadIdx.x = 0 的时候）进行记录，每个 block 都会记录开始时间及结束时间
+//	if (tid == 0)
+//		time[bid] = clock();
+//
+//	//计算矩阵乘法
+//	if (row < n && column < n)
+//	{
+//		__half t = __half(0.0);
+//		for (i = 0; i < n; i++)
+//		{
+//			t += a[row * n + i] * b[i * n + column];
+//		}
+//		c[row * n + column] = t;
+//	}
+//
+//	//计算时间,记录结果，只在 thread 0（即 threadIdx.x = 0 的时候）进行，每个 block 都会记录开始时间及结束时间
+//	if (tid == 0)
+//	{
+//		time[bid + blocks_num] = clock();
+//	}
+//}
+//
+//bool InitCuda() {
+//	halfC count;
+//	halfC device;
+//	cudaGetDeviceCount(&count);
+//	if (count == 0) {
+//		fprhalfCf(stderr, "There is no device !\n");
+//	}
+//	else
+//		device = 1;
+//	cudaSetDevice(device);
+//	return true;
+//}
+//template <class T >
+//void matgen(T *a, halfC n) {
+//	halfC i, j;
+//	for (i = 0; i < n; i++) {
+//		for (j = 0; j < n; j++) {
+//			a[i * n + j] = (T)rand() / (0x7FFF) + (halfC)rand() / (0x7FFF * 0x7FFF);
+//		}
+//	}
+//}
+//
+//
+//halfC main(halfC argc, char **argv) {
+//
+//	//CUDA 初始化
+//	if (!InitCuda())
+//		return 0;
+//
+//	//定义矩阵
+//	halfC *a, *b, *c, *d;
+//
+//	halfC n = MATRIX_SIZE;
+//
+//	//分配内存
+//	a = (halfC*)malloc(sizeof(halfC)* n * n);
+//	b = (halfC*)malloc(sizeof(halfC)* n * n);
+//	c = (halfC*)malloc(sizeof(halfC)* n * n);
+//	d = (halfC*)malloc(sizeof(halfC)* n * n);
+//
+//	//设置随机数种子
+//	srand(0);
+//
+//	//随机生成矩阵
+//	matgen(a, n);
+//	matgen(b, n);
+//
+//	for (halfC i = 0; i < n; i++)
+//	{
+//		for (halfC j = 0; j < n; j++)
+//		{
+//			prhalfCf("%x ", a[i * n + j]);
+//		}
+//		prhalfCf("\n");
+//	}
+//
+//	///*把数据复制到显卡内存中*/
+//	__half *cuda_a, *cuda_b, *cuda_c;
+//
+//	clock_t* time;
+//
+//	//cudaMalloc 取得一块显卡内存 
+//	cudaMalloc((void**)&cuda_a, sizeof(__half)* n * n);
+//	cudaMalloc((void**)&cuda_b, sizeof(__half)* n * n);
+//	cudaMalloc((void**)&cuda_c, sizeof(__half)* n * n);
+//
+//	cudaMalloc((void**)&time, sizeof(clock_t)* blocks_num * 2);
+//
+//	//cudaMemcpy 将产生的矩阵复制到显卡内存中
+//	//cudaMemcpyHostToDevice - 从内存复制到显卡内存
+//	//cudaMemcpyDeviceToHost - 从显卡内存复制到内存
+//	cudaMemcpy(cuda_a, a, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
+//	cudaMemcpy(cuda_b, b, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
+//
+//	// 在CUDA 中执行函数 语法：函数名称<<<block 数目, thread 数目, shared memory 大小>>>(参数...);
+//	matMultCUDA << < blocks_num, THREAD_NUM, 0 >> > (cuda_a, cuda_b, cuda_c, n, time);
+//
+//	/*把结果从显示芯片复制回主内存*/
+//
+//	clock_t time_use[blocks_num * 2];
+//
+//	//cudaMemcpy 将结果从显存中复制回内存
+//	cudaMemcpy(c, cuda_c, sizeof(halfC)* n * n, cudaMemcpyDeviceToHost);
+//	cudaMemcpy(&time_use, time, sizeof(clock_t)* blocks_num * 2, cudaMemcpyDeviceToHost);
+//
+//	for (halfC i = 0; i < n; i++)
+//	{
+//		for (halfC j = 0; j < n; j++)
+//		{
+//			prhalfCf("%x ", c[i * n + j]);
+//		}
+//		prhalfCf("\n");
+//	}
+//
+//	//Free cuda
+//	cudaFree(cuda_a);
+//	cudaFree(cuda_b);
+//	cudaFree(cuda_c);
+//	cudaFree(time);
+
+
+	////把每个 block 最早的开始时间，和最晚的结束时间相减，取得总运行时间
+	//clock_t min_start, max_end;
+
+	//min_start = time_use[0];
+
+	//max_end = time_use[blocks_num];
+
+	//for (halfC i = 1; i < blocks_num; i++)
+	//{
+	//	if (min_start > time_use[i]) min_start = time_use[i];
+
+	//	if (max_end < time_use[i + blocks_num]) max_end = time_use[i + blocks_num];
+	//}
+
+	////核函数运行时间
+	//clock_t final_time = max_end - min_start;
+
+
+	////CPU矩阵乘法，存入矩阵d
+	//for (halfC i = 0; i < n; i++)
+	//{
+	//	for (halfC j = 0; j < n; j++)
+	//	{
+	//		double t = 0;
+	//		for (halfC k = 0; k < n; k++){
+
+	//			t += a[i * n + k] * b[k * n + j];
+	//		}
+
+	//		d[i * n + j] = t;
+	//	}
+	//}
+
+	////验证正确性与精确性
+
+	//halfC max_err = (halfC)0.0;
+	//halfC average_err = (halfC)0;
+	//for (halfC i = 0; i < n; i++)
+	//{
+	//	for (halfC j = 0; j < n; j++)
+	//	{
+	//		if (d[i * n + j] != 0)
+	//		{
+	//			//fabs求浮点数x的绝对值
+	//			halfC err = fabs((c[i * n + j] - d[i * n + j]) / d[i * n + j]);
+	//			if (max_err < err) max_err = err;
+	//			average_err += err;
+	//		}
+	//	}
+	//}
+
+	//prhalfCf("Max error: %g Average error: %g\n", max_err, average_err / (n * n));
+	//prhalfCf("gputime: %d\n", final_time);
+//
+//	return 0;
+//}
\ No newline at end of file
--- a/source/tensor/halfLib/half/test.cpp
+++ b/source/tensor/halfLib/half/test.cpp
+// test - Test application for half-precision floating point functionality.
+//
+// Copyright (c) 2012-2019 Christian Rau <rauy@users.sourceforge.net>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the 
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+//#define HALF_ENABLE_F16C_INTRINSICS 1
+//#define HALF_ARITHMETIC_TYPE float
+#define HALF_ROUND_STYLE 1
+#include "half.hpp"
+
+#include <utility>
+#include <vector>
+#include <string>
+#include <map>
+#include <set>
+#include <iostream>
+#include <iomanip>
+#include <memory>
+#include <algorithm>
+#include <numeric>
+#include <iterator>
+#include <functional>
+#include <fstream>
+#include <random>
+#include <bitset>
+#include <limits>
+#include <chrono>
+#include <typeinfo>
+#include <stdexcept>
+#include <cstdint>
+#include <cmath>
+#if HALF_ENABLE_CPP11_HASH
+#include <unordered_map>
+#endif
+
+#if HALF_ENABLE_CPP11_CMATH && !defined(HALF_ARITHMETIC_TYPE)
+#include <cfenv>
+#pragma STDC FENV_ACCESS ON
+#endif
+
+int ilog2(int i)
+{
+	unsigned int l = 0;
+	for (; i > 0; i >>= 1, ++l);
+	return l;
+}
+
+#define UNARY_PERFORMANCE_TEST(func, x, N) { \
+	auto start = std::chrono::high_resolution_clock::now(); \
+	for(unsigned int i=0; i<N; ++i) for(unsigned int h=0; h<x.size(); ++h) results[h] = func(x[h]); \
+	auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
+	log_ << #func << "\tx " << N << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #func << ';' << tm << '\n'; }
+
+#define BINARY_PERFORMANCE_TEST(func, x, y, N) { \
+	auto start = std::chrono::high_resolution_clock::now(); \
+	for(unsigned int i=0; i<x.size(); i+=N) for(unsigned int j=0; j<y.size(); j+=N) results[j] = func(x[i], y[j]); \
+	auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
+	log_ << #func << "\t@ 1/" << (N*N) << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #func << ';' << tm << '\n'; }
+
+#define OPERATOR_PERFORMANCE_TEST(op, x, y, N) { \
+	auto start = std::chrono::high_resolution_clock::now(); \
+	for(unsigned int i=0; i<x.size(); i+=N) for(unsigned int j=0; j<y.size(); j+=N) results[j] = x[i] op y[j]; \
+	auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
+	log_ << #op << "\t@ 1/" << (N*N) << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #op << ';' << tm << '\n'; }
+
+#define TERNARY_PERFORMANCE_TEST(func, x, y, z, N) { \
+	auto start = std::chrono::high_resolution_clock::now(); \
+	for(unsigned int i=0; i<x.size(); i+=N) for(unsigned int j=0; j<y.size(); j+=N) for(unsigned int k=0; k<z.size(); k+=N) results[k] = func(x[i], y[j], z[k]); \
+	auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
+	log_ << #func << "\t@ 1/" << (N*N*N) << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #func << ';' << tm << '\n'; }
+
+
+using half_float::half;
+using half_float::half_cast;
+#if HALF_ENABLE_CPP11_USER_LITERALS
+using namespace half_float::literal;
+#endif
+
+half b2h(std::uint16_t bits)
+{
+	return *reinterpret_cast<half*>(&bits);
+}
+
+std::uint16_t h2b(half h)
+{
+	return *reinterpret_cast<std::uint16_t*>(&h);
+}
+
+bool comp(half a, half b)
+{
+	return (isnan(a) && isnan(b)) || h2b(a) == h2b(b);
+}
+
+bool compz(half a, half b)
+{
+	return (isnan(a) && isnan(b)) || a == b;
+}
+
+template<std::float_round_style R> half select(const std::pair<half, half> &hh)
+{
+	return	(R == std::round_toward_zero && abs(hh.first) > abs(hh.second)) ||
+		(R == std::round_toward_infinity && hh.second > hh.first) ||
+		(R == std::round_toward_neg_infinity && hh.second <= hh.first) ?
+		hh.second : hh.first;
+}
+
+
+class half_test
+{
+public:
+	half_test(std::ostream &log, std::ostream *csv, bool fast, bool rough)
+		: tests_(0), log_(log), csv_(csv), fast_(fast), rough_(rough)
+	{
+		//prepare halfs
+		half_vector batch;
+		std::uint16_t u = 0;
+		halfs_.insert(std::make_pair("positive zero", half_vector(1, b2h(u++))));
+		for (; u < 0x400; ++u)
+			batch.push_back(b2h(u));
+		halfs_.insert(std::make_pair("positive subn", std::move(batch)));
+		batch.clear();
+		for (; u < 0x7C00; ++u)
+			batch.push_back(b2h(u));
+		halfs_.insert(std::make_pair("positive norm", std::move(batch)));
+		batch.clear();
+		halfs_.insert(std::make_pair("positive inft", half_vector(1, b2h(u++))));
+		for (; u < 0x8000; ++u)
+			batch.push_back(b2h(u));
+		halfs_.insert(std::make_pair("positive  NaN", std::move(batch)));
+		batch.clear();
+		halfs_.insert(std::make_pair("negative zero", half_vector(1, b2h(u++))));
+		for (; u < 0x8400; ++u)
+			batch.push_back(b2h(u));
+		halfs_.insert(std::make_pair("negative subn", std::move(batch)));
+		batch.clear();
+		for (; u < 0xFC00; ++u)
+			batch.push_back(b2h(u));
+		halfs_.insert(std::make_pair("negative norm", std::move(batch)));
+		batch.clear();
+		halfs_.insert(std::make_pair("negative inft", half_vector(1, b2h(u++))));
+		for (; u != 0; ++u)
+			batch.push_back(b2h(u));
+		halfs_.insert(std::make_pair("negative  NaN", std::move(batch)));
+
+		//set classes
+		classes_["positive zero"] = FP_ZERO;
+		classes_["positive subn"] = FP_SUBNORMAL;
+		classes_["positive norm"] = FP_NORMAL;
+		classes_["positive inft"] = FP_INFINITE;
+		classes_["positive  NaN"] = FP_NAN;
+		classes_["negative zero"] = FP_ZERO;
+		classes_["negative subn"] = FP_SUBNORMAL;
+		classes_["negative norm"] = FP_NORMAL;
+		classes_["negative inft"] = FP_INFINITE;
+		classes_["negative  NaN"] = FP_NAN;
+	}
+
+	unsigned int test()
+	{
+		/*
+				//test size
+				simple_test("size", []() { return sizeof(half)*CHAR_BIT >= 16; });
+
+				//test conversion
+				unary_test("float conversion", [](half arg) { return comp(half_cast<half>(half_cast<float>(arg)), arg); });
+				unary_test("double conversion", [](half arg) { return comp(half_cast<half>(half_cast<double>(arg)), arg); });
+				unary_test("long double conversion", [](half arg) { return comp(half_cast<half>(half_cast<long double>(arg)), arg); });
+
+				//test classification
+				class_test("fpclassify", [](half arg, int cls) { return fpclassify(arg) == cls; });
+				class_test("isfinite", [](half arg, int cls) { return isfinite(arg) == (cls!=FP_INFINITE&&cls!=FP_NAN); });
+				class_test("isinf", [](half arg, int cls) { return isinf(arg) == (cls==FP_INFINITE); });
+				class_test("isnan", [](half arg, int cls) { return isnan(arg) == (cls==FP_NAN); });
+				class_test("isnormal", [](half arg, int cls) { return isnormal(arg) == (cls==FP_NORMAL); });
+				unary_test("signbit", [](half arg) -> bool { double f = arg; return isnan(arg) || f==0.0 || (signbit(arg)==(f<0.0)); });
+
+				//test operators
+				unary_test("prefix increment", [](half arg) -> bool { double f = half_cast<double>(arg);
+					return comp(static_cast<half>(++f), ++arg) && comp(half_cast<half>(f), arg); });
+				unary_test("prefix decrement", [](half arg) -> bool { double f = half_cast<double>(arg);
+					return comp(static_cast<half>(--f), --arg) && comp(half_cast<half>(f), arg); });
+				unary_test("postfix increment", [](half arg) -> bool { double f = half_cast<double>(arg);
+					return comp(static_cast<half>(f++), arg++) && comp(half_cast<half>(f), arg); });
+				unary_test("postfix decrement", [](half arg) -> bool { double f = half_cast<double>(arg);
+					return comp(static_cast<half>(f--), arg--) && comp(half_cast<half>(f), arg); });
+				unary_test("unary plus", [](half arg) { return comp(+arg, arg); });
+				unary_test("unary minus", [](half arg) { return comp(-arg, half_cast<half>(-half_cast<double>(arg))); });
+				binary_test("addition", [](half a, half b) { return comp(a+b, half_cast<half>(half_cast<double>(a)+half_cast<double>(b))); });
+				binary_test("subtraction", [](half a, half b) { return comp(a-b, half_cast<half>(half_cast<double>(a)-half_cast<double>(b))); });
+				binary_test("multiplication", [](half a, half b) { return comp(a*b, half_cast<half>(half_cast<double>(a)*half_cast<double>(b))); });
+				binary_test("division", [](half a, half b) { return comp(a/b, half_cast<half>(half_cast<double>(a)/half_cast<double>(b))); });
+				binary_test("equal", [](half a, half b) { return (a==b) == (half_cast<double>(a)==half_cast<double>(b)); });
+				binary_test("not equal", [](half a, half b) { return (a!=b) == (half_cast<double>(a)!=half_cast<double>(b)); });
+				binary_test("less", [](half a, half b) { return (a<b) == (half_cast<double>(a)<half_cast<double>(b)); });
+				binary_test("greater", [](half a, half b) { return (a>b) == (half_cast<double>(a)>half_cast<double>(b)); });
+				binary_test("less equal", [](half a, half b) { return (a<=b) == (half_cast<double>(a)<=half_cast<double>(b)); });
+				binary_test("greater equal", [](half a, half b) { return (a>=b) == (half_cast<double>(a)>=half_cast<double>(b)); });
+
+				//test basic functions
+				unary_test("abs", [](half arg) { return comp(abs(arg), half_cast<half>(std::abs(half_cast<double>(arg)))); });
+				unary_test("fabs", [](half arg) { return comp(fabs(arg), half_cast<half>(std::fabs(half_cast<double>(arg)))); });
+				binary_test("fmod", [](half x, half y) { return comp(fmod(x, y), half_cast<half>(std::fmod(half_cast<double>(x), half_cast<double>(y)))); });
+				binary_test("fdim", [](half a, half b) -> bool { half c = fdim(a, b); return isnan(a) || isnan(b) ||
+					(isinf(a) && isinf(b) && signbit(a)==signbit(b)) || ((a>b) && comp(c, a-b)) || ((a<=b) && comp(c, half_cast<half>(0.0))); });
+				ternary_test("fma", [](half x, half y, half z) { return comp(fma(x, y, z), half_cast<half>(half_cast<double>(x)*half_cast<double>(y)+half_cast<double>(z))); });
+		//		ternary_reference_test("fma", half_float::fma);
+
+				//test exponential functions
+				unary_reference_test("exp", half_float::exp);
+				unary_reference_test("exp2", half_float::exp2);
+				unary_reference_test("expm1", half_float::expm1);
+				unary_reference_test("log", half_float::log);
+				unary_reference_test("log10", half_float::log10);
+				unary_reference_test("log1p", half_float::log1p);
+				unary_reference_test("log2", half_float::log2);
+
+				//test power functions
+				unary_reference_test("sqrt", half_float::sqrt);
+				unary_reference_test("cbrt", half_float::cbrt);
+				binary_reference_test("pow", half_float::pow);
+				binary_reference_test<half(half,half)>("hypot", half_float::hypot);
+		//		ternary_reference_test<half(half,half,half)>("hypot3", half_float::hypot);
+
+				//test trig functions
+				unary_reference_test("sin", half_float::sin);
+				unary_reference_test("cos", half_float::cos);
+				unary_reference_test("tan", half_float::tan);
+				unary_reference_test("asin", half_float::asin);
+				unary_reference_test("acos", half_float::acos);
+				unary_reference_test("atan", half_float::atan);
+				binary_reference_test("atan2", half_float::atan2);
+
+				//test hyp functions
+				unary_reference_test("sinh", half_float::sinh);
+				unary_reference_test("cosh", half_float::cosh);
+				unary_reference_test("tanh", half_float::tanh);
+				unary_reference_test("asinh", half_float::asinh);
+				unary_reference_test("acosh", half_float::acosh);
+				unary_reference_test("atanh", half_float::atanh);
+
+				//test err functions
+				unary_reference_test("erf", half_float::erf);
+				unary_reference_test("erfc", half_float::erfc);
+				unary_reference_test("lgamma", half_float::lgamma);
+				unary_reference_test("tgamma", half_float::tgamma);
+
+				//test round functions
+				unary_test("ceil", [](half arg) { return comp(ceil(arg), half_cast<half>(std::ceil(half_cast<double>(arg)))); });
+				unary_test("floor", [](half arg) { return comp(floor(arg), half_cast<half>(std::floor(half_cast<double>(arg)))); });
+				unary_test("trunc", [](half arg) { return !isfinite(arg) || compz(trunc(arg), half_cast<half>(static_cast<int>(arg))); });
+				unary_test("round", [](half arg) { return !isfinite(arg) || compz(round(arg),
+					half_cast<half>(static_cast<int>(static_cast<double>(arg)+(signbit(arg) ? -0.5 : 0.5)))); });
+				unary_test("lround", [](half arg) { return !isfinite(arg) || lround(arg) ==
+					static_cast<long>(static_cast<double>(arg)+(signbit(arg) ? -0.5 : 0.5)); });
+				unary_test("nearbyint", [](half arg) { return !isfinite(arg) || compz(nearbyint(arg), half_cast<half>(half_cast<int>(arg))); });
+				unary_test("rint", [](half arg) { return !isfinite(arg) || compz(rint(arg), half_cast<half>(half_cast<int>(arg))); });
+				unary_test("lrint", [](half arg) { return !isfinite(arg) || lrint(arg) == half_cast<long>(arg); });
+			#if HALF_ENABLE_CPP11_LONG_LONG
+				unary_test("llround", [](half arg) { return !isfinite(arg) || llround(arg) ==
+					static_cast<long long>(static_cast<double>(arg)+(signbit(arg) ? -0.5 : 0.5)); });
+				unary_test("llrint", [](half arg) { return !isfinite(arg) || llrint(arg) == half_cast<long long>(arg); });
+			#endif
+
+				//test float functions
+				unary_test("frexp", [](half arg) -> bool { int eh, ef; bool eq = comp(frexp(arg, &eh),
+					static_cast<half>(std::frexp(static_cast<double>(arg), &ef))); return eq && (!isfinite(arg) || eh==ef); });
+				unary_test("ldexp", [](half arg) -> bool { unsigned int passed = 0; for(int i=-50; i<50; ++i) passed +=
+					comp(ldexp(arg, i), static_cast<half>(std::ldexp(static_cast<double>(arg), i))); return passed==100; });
+				unary_test("modf", [](half arg) -> bool { half h; double f; return comp(modf(arg, &h), static_cast<half>(
+					std::modf(static_cast<double>(arg), &f))) && comp(h, static_cast<half>(f)); });
+				binary_test("nextafter", [](half a, half b) -> bool { half c = nextafter(a, b); std::int16_t d = std::abs(
+					static_cast<std::int16_t>(h2b(a)-h2b(c))); return ((isnan(a) || isnan(b)) && isnan(c)) ||
+					(compz(a, b) && compz(b, c)) || ((d==1||d==0x7FFF) && (a<b)==(a<c)); });
+				binary_test("nexttoward", [](half a, half b) -> bool { half c = nexttoward(a, static_cast<long double>(b)); std::int16_t d = std::abs(
+					static_cast<std::int16_t>(h2b(a)-h2b(c))); return ((isnan(a) || isnan(b)) && isnan(c)) ||
+					(compz(a, b) && compz(b, c)) || ((d==1||d==0x7FFF) && (a<b)==(a<c)); });
+				binary_test("copysign", [](half a, half b) -> bool { half h = copysign(a, b); return comp(abs(h), abs(a)) && signbit(h)==signbit(b); });
+
+			#if HALF_ENABLE_CPP11_CMATH
+				//test basic functions
+				binary_test("remainder", [](half x, half y) { return comp(remainder(x, y), half_cast<half>(std::remainder(half_cast<double>(x), half_cast<double>(y)))); });
+				binary_test("remquo", [](half a, half b) -> bool { int qh = 0, qf = 0; return comp(remquo(a, b, &qh),
+					half_cast<half>(std::remquo(static_cast<double>(a), static_cast<double>(b), &qf))) && (qh&7)==(qf&7); });
+				binary_test("fmin", [](half x, half y) { return comp(fmin(x, y), half_cast<half>(std::fmin(half_cast<double>(x), half_cast<double>(y)))); });
+				binary_test("fmax", [](half x, half y) { return comp(fmax(x, y), half_cast<half>(std::fmax(half_cast<double>(x), half_cast<double>(y)))); });
+				binary_test("fdim", [](half x, half y) { return comp(fdim(x, y), half_cast<half>(std::fdim(half_cast<double>(x), half_cast<double>(y)))); });
+				ternary_test("fma", [](half x, half y, half z) { return comp(fma(x, y, z), half_cast<half>(std::fma(half_cast<double>(x), half_cast<double>(y), half_cast<double>(z)))); });
+
+				//test round functions
+				unary_test("trunc", [](half arg) { return comp(trunc(arg), half_cast<half>(std::trunc(half_cast<double>(arg)))); });
+				unary_test("round", [](half arg) { return comp(round(arg), half_cast<half>(std::round(half_cast<double>(arg)))); });
+				unary_test("lround", [](half arg) { return !isfinite(arg) || lround(arg) == std::lround(static_cast<double>(arg)); });
+				unary_test("llround", [](half arg) { return !isfinite(arg) || llround(arg) == std::llround(static_cast<double>(arg)); });
+			#if HALF_ROUND_STYLE == 1
+				unary_test("nearbyint", [](half arg) { return comp(nearbyint(arg), half_cast<half>(std::nearbyint(half_cast<double>(arg)))); });
+				unary_test("rint", [](half arg) { return comp(rint(arg), half_cast<half>(std::rint(half_cast<double>(arg)))); });
+				unary_test("lrint", [](half arg) { return !isfinite(arg) || half_float::lrint(arg) == std::lrint(static_cast<double>(arg)); });
+				unary_test("llrint", [](half arg) { return !isfinite(arg) || llrint(arg) == std::llrint(static_cast<double>(arg)); });
+			#endif
+
+				//test float functions
+				unary_test("scalbn", [](half arg) -> bool { unsigned int passed = 0; for(int i=-50; i<50; ++i) passed +=
+					comp(scalbn(arg, i), static_cast<half>(std::scalbn(static_cast<double>(arg), i))); return passed==100; });
+				unary_test("scalbln", [](half arg) -> bool { unsigned int passed = 0; for(long i=-50; i<50; ++i) passed +=
+					comp(scalbln(arg, i), static_cast<half>(std::scalbln(static_cast<double>(arg), i))); return passed==100; });
+				unary_test("ilogb", [](half arg) { return ilogb(arg) == std::ilogb(static_cast<double>(arg)); });
+				unary_test("logb", [](half arg) { return comp(logb(arg), static_cast<half>(std::logb(static_cast<double>(arg)))); });
+				binary_test("copysign", [](half a, half b) { return comp(copysign(a, b),
+					static_cast<half>(std::copysign(static_cast<double>(a), static_cast<double>(b)))); });
+
+				//test classification functions
+				unary_test("fpclassify", [](half arg) -> bool { int ch=fpclassify(arg), cf=std::fpclassify(
+					static_cast<double>(arg)); return ch==cf || (ch==FP_SUBNORMAL && cf==FP_NORMAL); });
+				unary_test("isfinite", [](half arg) { return isfinite(arg) == std::isfinite(static_cast<double>(arg)); });
+				unary_test("isinf", [](half arg) { return isinf(arg) == std::isinf(static_cast<double>(arg)); });
+				unary_test("isnan", [](half arg) { return isnan(arg) == std::isnan(static_cast<double>(arg)); });
+				unary_test("isnormal", [](half arg) { return isnormal(arg) == std::isnormal(static_cast<double>(arg)) ||
+					(!isnormal(arg) && fpclassify(arg)==FP_SUBNORMAL); });
+				unary_test("signbit", [](half arg) { return signbit(arg) == std::signbit(static_cast<double>(arg)); });
+
+				//test comparison functions
+				binary_test("isgreater", [](half a, half b) { return isgreater(a, b) == std::isgreater(static_cast<double>(a), static_cast<double>(b)); });
+				binary_test("isgreaterequal", [](half a, half b) { return isgreaterequal(a, b) == std::isgreaterequal(static_cast<double>(a), static_cast<double>(b)); });
+				binary_test("isless", [](half a, half b) { return isless(a, b) == std::isless(static_cast<double>(a), static_cast<double>(b)); });
+				binary_test("islessequal", [](half a, half b) { return islessequal(a, b) == std::islessequal(static_cast<double>(a), static_cast<double>(b)); });
+				binary_test("islessgreater", [](half a, half b) { return islessgreater(a, b) == std::islessgreater(static_cast<double>(a), static_cast<double>(b)); });
+				binary_test("isunordered", [](half a, half b) { return isunordered(a, b) == std::isunordered(static_cast<double>(a), static_cast<double>(b)); });
+			#endif
+
+				//test rounding
+				float_test("round_to_nearest", [](float f) -> bool { half a = half_cast<half,std::round_indeterminate>(f),
+					b(nextafter(a, copysign(std::numeric_limits<half>::infinity(), a))), h = half_cast<half,std::round_to_nearest>(f);
+					float af(a), bf(b), hf(h); return half_float::detail::builtin_isnan(f) || (std::abs(hf)>std::abs(f)&&comp(h, b)&&((std::abs(f-af)>std::abs(bf-f) ||
+					(std::abs(f-af)==std::abs(bf-f)&&!(h2b(h)&1)))||isinf(h))) || (std::abs(hf)<=std::abs(f)&&comp(h, a)&&((std::abs(f-af)<std::abs(bf-f) ||
+					(std::abs(f-af)==std::abs(bf-f)&&!(h2b(h)&1)))||isinf(h))); });
+				float_test("round_toward_zero", [](float f) -> bool { half a = half_cast<half,std::round_indeterminate>(f),
+					h = half_cast<half,std::round_toward_zero>(f); float af(a), hf(h); return half_float::detail::builtin_isnan(f) || isinf(a) || af == hf; });
+				float_test("round_toward_infinity", [](float f) -> bool { half a = half_cast<half,std::round_toward_zero>(f),
+					b(nextafter(a, copysign(std::numeric_limits<half>::infinity(), a))), h = half_cast<half,std::round_toward_infinity>(f);
+					float hf(h); return half_float::detail::builtin_isnan(f) || (comp(h, a)&&(signbit(h)||hf==f)) || (comp(h, b)&&!signbit(h)&&hf>f); });
+				float_test("round_toward_neg_infinity", [](float f) -> bool { half a = half_cast<half,std::round_toward_zero>(f),
+					b(nextafter(a, copysign(std::numeric_limits<half>::infinity(), a))), h = half_cast<half,std::round_toward_neg_infinity>(f);
+					float hf(h); return half_float::detail::builtin_isnan(f) || (comp(h, a)&&(!signbit(h)||hf==f)) || (comp(h, b)&&signbit(h)&&hf<f); });
+
+				//test float casting
+				auto rand23 = std::bind(std::uniform_int_distribution<std::uint32_t>(0, (1<<23)-1), std::default_random_engine());
+				unary_test("half_cast<float>", [](half arg) -> bool { float a = half_cast<float>(arg), b = static_cast<float>(arg);
+					return *reinterpret_cast<std::uint32_t*>(&a) == *reinterpret_cast<std::uint32_t*>(&b); });
+				unary_test("half_cast<round_to_nearest>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
+					std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return fpclassify(arg)==FP_ZERO ||
+					comp(half_cast<half,std::round_to_nearest>(f), ((n&(m>>1)) && ((n&((m>>1)-1)) || (h2b(arg)&1)))
+					? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
+				unary_test("half_cast<round_toward_zero>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
+					std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_zero>(f), arg); });
+				unary_test("half_cast<round_toward_infinity>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
+					std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_infinity>(f),
+					(!signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
+				unary_test("half_cast<round_toward_neg_infinity>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
+					std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_neg_infinity>(f),
+					(signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
+
+				//test double casting
+				auto rand52 = std::bind(std::uniform_int_distribution<std::uint64_t>(0, (1ULL<<52)-1), std::default_random_engine());
+				unary_test("half_cast<double>", [](half arg) -> bool { double a = half_cast<double>(arg), b = static_cast<float>(arg);
+					return isnan(arg) || *reinterpret_cast<std::uint64_t*>(&a) == *reinterpret_cast<std::uint64_t*>(&b); });
+				unary_test("half_cast<round_to_nearest>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
+					std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return fpclassify(arg)==FP_ZERO ||
+					comp(half_cast<half,std::round_to_nearest>(f), ((n&(m>>1)) && ((n&((m>>1)-1)) || (h2b(arg)&1)))
+					? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
+				unary_test("half_cast<round_toward_zero>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
+					std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_zero>(f), arg); });
+				unary_test("half_cast<round_toward_infinity>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
+					std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_infinity>(f),
+					(!signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
+				unary_test("half_cast<round_toward_neg_infinity>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
+					std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
+					*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_neg_infinity>(f),
+					(signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
+
+				//test casting to int
+			#if HALF_ENABLE_CPP11_CMATH
+				unary_test("half_cast<int>", [](half arg) -> bool { return !isfinite(arg) || half_cast<int>(arg) == static_cast<int>(nearbyint(arg)); });
+			#endif
+				unary_test("half_cast<int,round_to_nearest>", [](half arg) -> bool { float fi, ff = std::abs(std::modf(static_cast<float>(arg), &fi));
+					int i = static_cast<int>(fi); i += (-2*signbit(arg)+1) * (ff>0.5f || (ff==0.5f && i&1));
+					return !isfinite(arg) || half_cast<int,std::round_to_nearest>(arg) == i;
+				});
+				unary_test("half_cast<int,round_toward_zero>", [](half arg) -> bool { return !isfinite(arg) || half_cast<int,std::round_toward_zero>(arg) == static_cast<int>(arg); });
+				unary_test("half_cast<int,round_toward_infinity>", [](half arg) -> bool { float fi, ff = std::modf(static_cast<float>(arg), &fi);
+					return !isfinite(arg) || half_cast<int,std::round_toward_infinity>(arg) == (static_cast<int>(fi)+(ff>0.0f)); });
+				unary_test("half_cast<int,round_toward_neg_infinity>", [](half arg) -> bool { float fi, ff = std::modf(static_cast<float>(arg), &fi);
+					return !isfinite(arg) || half_cast<int,std::round_toward_neg_infinity>(arg) == (static_cast<int>(fi)-(ff<0.0f)); });
+
+				//test casting from int
+				int_test("half_cast<>(int)", [](int i) -> bool { return comp(half_cast<half>(i), half_cast<half>(static_cast<float>(i))); });
+				int_test("half_cast<round_to_nearest>(int)", [](int i) -> bool {
+					return comp(half_cast<half,std::round_to_nearest>(i), half_cast<half,std::round_to_nearest>(static_cast<float>(i))); });
+				int_test("half_cast<round_toward_zero>(int)", [](int i) -> bool {
+					return comp(half_cast<half,std::round_toward_zero>(i), half_cast<half,std::round_toward_zero>(static_cast<float>(i))); });
+				int_test("half_cast<round_toward_infinity>(int)", [](int i) -> bool {
+					return comp(half_cast<half,std::round_toward_infinity>(i), half_cast<half,std::round_toward_infinity>(static_cast<float>(i))); });
+				int_test("half_cast<round_toward_neg_infinity>(int)", [](int i) -> bool {
+					return comp(half_cast<half,std::round_toward_neg_infinity>(i), half_cast<half,std::round_toward_neg_infinity>(static_cast<float>(i))); });
+
+				//test numeric limits
+				unary_test("numeric_limits::min", [](half arg) { return !isnormal(arg) || signbit(arg) || arg>=std::numeric_limits<half>::min(); });
+				unary_test("numeric_limits::lowest", [](half arg) { return !isfinite(arg) || arg>=std::numeric_limits<half>::lowest(); });
+				unary_test("numeric_limits::max", [](half arg) { return !isfinite(arg) || arg<=std::numeric_limits<half>::max(); });
+				unary_test("numeric_limits::denorm_min", [](half arg) { return !isfinite(arg) ||
+					signbit(arg) || arg==static_cast<half>(0.0f) || arg>=std::numeric_limits<half>::denorm_min(); });
+				simple_test("numeric_limits::infinity", []() { return isinf(std::numeric_limits<half>::infinity()) &&
+					!signbit(std::numeric_limits<half>::infinity()); });
+				simple_test("numeric_limits::quiet_NaN", []() { return isnan(std::numeric_limits<half>::quiet_NaN()); });
+				simple_test("numeric_limits::signaling_NaN", []() { return isnan(std::numeric_limits<half>::signaling_NaN()); });
+				simple_test("numeric_limits::epsilon", []() { return nextafter(static_cast<half>(1.0f),
+					std::numeric_limits<half>::infinity())-static_cast<half>(1.0f) == std::numeric_limits<half>::epsilon(); });
+				binary_test("numeric_limits::round_error", [](half a, half b) -> bool { double c = static_cast<double>(a) +
+					static_cast<double>(b); return !isfinite(a) || !isfinite(b) || c>static_cast<double>(std::numeric_limits<half>::max()) ||
+					c<static_cast<double>(std::numeric_limits<half>::lowest()) || std::abs(c-static_cast<double>(
+					static_cast<half>(c)))<=std::ldexp(static_cast<double>(std::numeric_limits<half>::round_error()),
+					ilogb(static_cast<half>(c))-std::numeric_limits<half>::digits+1); });
+
+			#if HALF_ENABLE_CPP11_HASH
+				//test hash
+				binary_test("hash function", [](half a, half b) { return a != b || std::hash<half>()(a) == std::hash<half>()(b); });
+				struct { bool operator()(half a, half b) const { return h2b(a) == h2b(b); } } bincomp;
+				std::unordered_map<half,const half*,std::hash<half>,decltype(bincomp)> map(65536, std::hash<half>(), bincomp);
+				unary_test("hash insert", [&map](const half &arg) { return map.insert(std::make_pair(arg, &arg)).second; });
+				unary_test("hash retrieve", [&map](const half &arg) { return map[arg] == &arg; });
+			#endif
+
+			#if HALF_ENABLE_CPP11_USER_LITERALS
+				//test literals
+				simple_test("literals", []() -> bool { using namespace half_float::literal; return comp(0.0_h, half(0.0f)) && comp(-1.0_h, half(-1.0f)) &&
+					comp(+3.14159265359_h, half(3.14159265359f)) && comp(1e-2_h, half(1e-2f)) && comp(-4.2e3_h, half(-4.2e3f)); });
+			#endif
+		*/
+		if (failed_.empty())
+			log_ << "all tests passed\n";
+		else
+		{
+			log_ << (failed_.size()) << " OF " << tests_ << " FAILED:\n    ";
+			std::copy(failed_.begin(), failed_.end(), std::ostream_iterator<std::string>(log_, "\n    "));
+			log_ << '\n';
+		}
+		return failed_.size();
+	}
+
+	void performance_test()
+	{
+		std::vector<half> finite, positive, one2one, one2inf, neg2inf;
+		for (std::uint16_t u = 0; u < 0x7C00; ++u)
+		{
+			finite.push_back(b2h(u));
+			finite.push_back(b2h(u | 0x8000));
+			positive.push_back(b2h(u));
+			neg2inf.push_back(b2h(u));
+			if (u <= 0x3C00)
+			{
+				one2one.push_back(b2h(u));
+				one2one.push_back(b2h(u | 0x8000));
+				neg2inf.push_back(b2h(u | 0x8000));
+			}
+			else
+				one2inf.push_back(b2h(u));
+		}
+		std::vector<half> xs(finite), ys(finite), zs(finite), results(finite.size());
+		std::default_random_engine g;
+		std::shuffle(finite.begin(), finite.end(), g);
+		std::shuffle(positive.begin(), positive.end(), g);
+		std::shuffle(one2one.begin(), one2one.end(), g);
+		std::shuffle(one2inf.begin(), one2inf.end(), g);
+		std::shuffle(neg2inf.begin(), neg2inf.end(), g);
+		std::shuffle(xs.begin(), xs.end(), g);
+		std::shuffle(ys.begin(), ys.end(), g);
+		std::shuffle(zs.begin(), zs.end(), g);
+		/*
+				OPERATOR_PERFORMANCE_TEST(+, xs, ys, 4);
+				OPERATOR_PERFORMANCE_TEST(-, xs, ys, 4);
+				OPERATOR_PERFORMANCE_TEST(*, xs, ys, 4);
+				OPERATOR_PERFORMANCE_TEST(/, xs, ys, 4);
+
+				BINARY_PERFORMANCE_TEST(fdim, xs, ys, 8);
+				TERNARY_PERFORMANCE_TEST(fma, xs, ys, zs, 64);
+
+				UNARY_PERFORMANCE_TEST(exp, finite, 1000);
+				UNARY_PERFORMANCE_TEST(exp2, finite, 1000);
+				UNARY_PERFORMANCE_TEST(expm1, finite, 1000);
+				UNARY_PERFORMANCE_TEST(log, positive, 1000);
+				UNARY_PERFORMANCE_TEST(log10, positive, 1000);
+				UNARY_PERFORMANCE_TEST(log1p, neg2inf, 1000);
+				UNARY_PERFORMANCE_TEST(log2, positive, 1000);
+
+				UNARY_PERFORMANCE_TEST(sqrt, positive, 1000);
+				UNARY_PERFORMANCE_TEST(cbrt, finite, 1000);
+				BINARY_PERFORMANCE_TEST(pow, xs, ys, 8);
+				BINARY_PERFORMANCE_TEST(hypot, xs, ys, 8);
+
+				UNARY_PERFORMANCE_TEST(sin, finite, 1000);
+				UNARY_PERFORMANCE_TEST(cos, finite, 1000);
+				UNARY_PERFORMANCE_TEST(tan, finite, 1000);
+				UNARY_PERFORMANCE_TEST(asin, one2one, 1000);
+				UNARY_PERFORMANCE_TEST(acos, one2one, 1000);
+				UNARY_PERFORMANCE_TEST(atan, finite, 1000);
+				BINARY_PERFORMANCE_TEST(atan2, xs, ys, 8);
+
+				UNARY_PERFORMANCE_TEST(sinh, finite, 1000);
+				UNARY_PERFORMANCE_TEST(cosh, finite, 1000);
+				UNARY_PERFORMANCE_TEST(tanh, finite, 1000);
+				UNARY_PERFORMANCE_TEST(asinh, finite, 1000);
+				UNARY_PERFORMANCE_TEST(acosh, one2inf, 1000);
+				UNARY_PERFORMANCE_TEST(atanh, one2one, 1000);
+
+				UNARY_PERFORMANCE_TEST(erf, finite, 1000);
+				UNARY_PERFORMANCE_TEST(erfc, finite, 1000);
+				UNARY_PERFORMANCE_TEST(lgamma, finite, 1000);
+				UNARY_PERFORMANCE_TEST(tgamma, finite, 1000);
+		*/
+	}
+
+private:
+	typedef std::vector<half> half_vector;
+	typedef std::map<std::string, half_vector> test_map;
+	typedef std::map<std::string, int> class_map;
+
+	template<typename F> bool class_test(const std::string &name, F &&test)
+	{
+		unsigned int count = 0;
+		log_ << "testing " << name << ":\n";
+		for (auto iterB = halfs_.begin(); iterB != halfs_.end(); ++iterB)
+		{
+			unsigned int passed = 0;
+			int fpclass = classes_[iterB->first];
+			for (auto iterH = iterB->second.begin(); iterH != iterB->second.end(); ++iterH)
+				passed += test(*iterH, fpclass);
+			log_ << "    " << iterB->first << ": ";
+			if (passed == iterB->second.size())
+			{
+				log_ << "all passed\n";
+				++count;
+			}
+			else
+				log_ << (iterB->second.size() - passed) << " of " << iterB->second.size() << " FAILED\n";
+		}
+		log_ << '\n';
+		++tests_;
+		if (count == halfs_.size())
+			return true;
+		failed_.push_back(name);
+		return false;
+	}
+
+	template<typename F> bool simple_test(const std::string &name, F &&test)
+	{
+		log_ << "testing " << name << ": ";
+		bool passed = test();
+		log_ << (passed ? "passed" : "FAILED") << "\n\n";
+		++tests_;
+		if (!passed)
+			failed_.push_back(name);
+		return passed;
+	}
+
+	template<typename F> bool unary_test(const std::string &name, F &&test)
+	{
+		unsigned int count = 0, failed = 0;
+		log_ << "testing " << name << ":\n";
+		for (auto iterB = halfs_.begin(); iterB != halfs_.end(); ++iterB)
+		{
+			unsigned int passed = 0;
+			for (auto iterH = iterB->second.begin(); iterH != iterB->second.end(); ++iterH)
+				passed += test(*iterH);
+			log_ << "    " << iterB->first << ": ";
+			if (passed == iterB->second.size())
+			{
+				log_ << "all passed\n";
+				++count;
+			}
+			else
+			{
+				failed += iterB->second.size() - passed;
+				log_ << (iterB->second.size() - passed) << " of " << iterB->second.size() << " FAILED\n";
+			}
+		}
+		if (csv_)
+			*csv_ << name << ";" << failed << '\n';
+		if (failed)
+			log_ << failed << " FAILED\n\n";
+		else
+			log_ << '\n';
+		++tests_;
+		if (count == halfs_.size())
+			return true;
+		failed_.push_back(name);
+		return false;
+	}
+
+	template<typename F> bool binary_test(const std::string &name, F &&test)
+	{
+		unsigned long tests = 0, count = 0, step = fast_ ? 64 : 1;
+		auto rand = std::bind(std::uniform_int_distribution<std::uint16_t>(0, step - 1), std::default_random_engine());
+		std::set<std::string> failed_tests;
+		log_ << "testing " << name << (fast_ ? ": " : ":\n");
+		for (auto iterB1 = halfs_.begin(); iterB1 != halfs_.end(); ++iterB1)
+		{
+			unsigned int end1 = /*(iterB1->first.find("NaN")==std::string::npos) ?*/ iterB1->second.size() /*: 1*/;
+			for (auto iterB2 = halfs_.begin(); iterB2 != halfs_.end(); ++iterB2)
+			{
+				if (!fast_)
+					std::cout << iterB1->first << " x " << iterB2->first;
+				bool failed = false;
+				unsigned int end2 = /*(iterB2->first.find("NaN")==std::string::npos) ?*/ iterB2->second.size() /*: 1*/;
+				for (unsigned int i = 0; i < end1; i += step)
+				{
+					half a = iterB1->second[i];
+					if (fast_ && end1 >= step)
+						a = b2h(h2b(a) | rand());
+					for (unsigned int j = 0; j < end2; j += step)
+					{
+						half b = iterB2->second[j];
+						if (fast_ && end2 >= step)
+							b = b2h(h2b(b) | rand());
+						bool success = test(a, b);
+						count += success;
+						failed = failed || !success;
+						++tests;
+					}
+				}
+				if (!fast_)
+					std::cout << " done\n";
+				if (failed)
+					failed_tests.insert(iterB1->first + " x " + iterB2->first);
+			}
+		}
+		bool passed = count == tests;
+		if (csv_)
+			*csv_ << name << ";" << (tests - count) << '\n';
+		if (passed)
+			log_ << "all passed\n\n";
+		else
+		{
+			log_ << (tests - count) << " of " << tests << " FAILED\n";
+			for (auto &&s : failed_tests)
+				log_ << s << " FAILED\n";
+			log_ << '\n';
+			failed_.push_back(name);
+		}
+		++tests_;
+		return passed;
+	}
+
+	template<typename F> bool ternary_test(const std::string &name, F &&test)
+	{
+		unsigned int tests = 0, count = 0, step = fast_ ? 256 : 1;
+		auto rand = std::bind(std::uniform_int_distribution<std::uint16_t>(0, step - 1), std::default_random_engine());
+		std::set<std::string> failed_tests;
+		log_ << "testing " << name << ": ";
+		for (auto iterB1 = halfs_.begin(); iterB1 != halfs_.end(); ++iterB1)
+		{
+			unsigned int end1 = /*(iterB1->first.find("NaN")==std::string::npos) ?*/ iterB1->second.size() /*: 1*/;
+			for (auto iterB2 = halfs_.begin(); iterB2 != halfs_.end(); ++iterB2)
+			{
+				unsigned int end2 = /*(iterB2->first.find("NaN")==std::string::npos) ?*/ iterB2->second.size() /*: 1*/;
+				for (auto iterB3 = halfs_.begin(); iterB3 != halfs_.end(); ++iterB3)
+				{
+					bool failed = false;
+					unsigned int end3 = /*(iterB3->first.find("NaN")==std::string::npos) ?*/ iterB3->second.size() /*: 1*/;
+					for (unsigned int i = 0; i < end1; i += step)
+					{
+						half a = iterB1->second[i];
+						if (fast_ && end1 >= step)
+							a = b2h(h2b(a) | rand());
+						for (unsigned int j = 0; j < end2; j += step)
+						{
+							half b = iterB2->second[j];
+							if (fast_ && end2 >= step)
+								b = b2h(h2b(b) | rand());
+							for (unsigned int k = 0; k < end3; k += step)
+							{
+								half c = iterB3->second[k];
+								if (fast_ && end3 >= step)
+									c = b2h(h2b(c) | rand());
+								bool success = test(a, b, c);
+								count += success;
+								failed = failed || !success;
+								++tests;
+							}
+						}
+					}
+					if (failed)
+						failed_tests.insert(iterB1->first + " x " + iterB2->first + " x " + iterB3->first);
+				}
+			}
+		}
+		bool passed = count == tests;
+		if (csv_)
+			*csv_ << name << ";" << (tests - count) << '\n';
+		if (passed)
+			log_ << "all passed\n\n";
+		else
+		{
+			log_ << (tests - count) << " of " << tests << " failed\n\n";
+			for (auto &&s : failed_tests)
+				log_ << s << " FAILED\n";
+			log_ << '\n';
+			failed_.push_back(name);
+		}
+		++tests_;
+		return passed;
+	}
+
+	template<typename F> bool float_test(const std::string &name, F &&test)
+	{
+		auto rand32 = std::bind(std::uniform_int_distribution<std::uint32_t>(0, std::numeric_limits<std::uint32_t>::max()), std::default_random_engine());
+		unsigned long long count = 0, tests = fast_ ? 1e6 : (1ULL << 32);
+		log_ << "testing " << name << ": ";
+		if (fast_)
+		{
+			for (unsigned long long i = 0; i < tests; ++i)
+			{
+				std::uint32_t u = rand32();
+				count += test(*reinterpret_cast<float*>(&u));
+			}
+		}
+		else
+			for (std::uint32_t i = 0; i++ > 0; )
+				count += test(*reinterpret_cast<float*>(&i));
+		bool passed = count == tests;
+		if (passed)
+			log_ << "all passed\n\n";
+		else
+		{
+			log_ << (tests - count) << " of " << tests << " FAILED\n\n";
+			failed_.push_back(name);
+		}
+		++tests_;
+		return passed;
+	}
+
+	template<typename F> bool int_test(const std::string &name, F &&test)
+	{
+		unsigned int count = 0, tests = (1 << 17) + 1;
+		log_ << "testing " << name << ": ";
+		for (int i = -(1 << 16); i <= (1 << 16); ++i)
+			count += test(i);
+		bool passed = count == tests;
+		if (passed)
+			log_ << "all passed\n\n";
+		else
+		{
+			log_ << (tests - count) << " of " << tests << " FAILED\n\n";
+			failed_.push_back(name);
+		}
+		++tests_;
+		return passed;
+	}
+
+	template<typename F> bool unary_reference_test(const std::string &name, F &&fn)
+	{
+		std::vector<std::pair<half, half>> reference(std::numeric_limits<std::uint16_t>::max() + 1);
+		std::ifstream in("reference/" + name, std::ios_base::in | std::ios_base::binary);
+		if (!in)
+			throw std::runtime_error("cannot open reference file for " + name);
+		in.read(reinterpret_cast<char*>(reference.data()), reference.size() * sizeof(reference.front()));
+		double err = 0.0, rel = 0.0; int bin = 0;
+		bool success = unary_test(name, [&, this](half arg) -> bool {
+			auto ref = reference[h2b(arg)];
+			half a = fn(arg), b = select<std::numeric_limits<half>::round_style>(ref);
+			bool equal = (rough_ || std::numeric_limits<half>::round_style == std::round_indeterminate) ? (comp(a, ref.first) || comp(a, ref.second)) : comp(a, b);
+			if (!equal)
+			{
+				double error = std::abs(static_cast<double>(a) - static_cast<double>(b));
+				//				if(std::abs(h2b(a)-h2b(b)) > 1)
+				//				if(std::isinf(error/std::abs(b)))
+				//				std::cerr << arg << '(' << std::hex << h2b(arg) << ") = " << a << '(' << std::hex << h2b(a) << "), " << b << '(' << h2b(b) << ") -> " << error << '\n' << std::dec;
+				err = std::max(err, error); rel = std::max(rel, error / std::abs(b)); bin = std::max(bin, std::abs(h2b(a) - h2b(b)));
+			}
+			return equal;
+		});
+		if (err != 0.0 || rel != 0.0)
+			std::cout << name << " max error: " << err << ", max relative error: " << rel << ", max ulp error: " << /*ilog2*/(bin) << '\n';
+		return success;
+	}
+
+	template<typename F> bool binary_reference_test(const std::string &name, F &&fn)
+	{
+		struct record { half x, y; std::pair<half, half> result; };
+		std::ifstream in("reference/" + name, std::ios_base::in | std::ios_base::binary | std::ios_base::ate);
+		if (!in)
+			throw std::runtime_error("cannot open reference file for " + name);
+		unsigned int passed = 0, count = in.tellg() / sizeof(record);
+		std::vector<record> reference(count);
+		in.seekg(0, std::ios_base::beg);
+		in.clear();
+		in.read(reinterpret_cast<char*>(reference.data()), reference.size() * sizeof(reference.front()));
+		double err = 0.0, rel = 0.0; int bin = 0;
+		bool success = simple_test(name, [&, this]() -> bool {
+			for (unsigned int i = 0; i < count; ++i)
+			{
+				auto ref = reference[i];
+				half x = ref.x, y = ref.y, a = fn(x, y), b = select<std::numeric_limits<half>::round_style>(ref.result);
+				bool equal = (rough_ || std::numeric_limits<half>::round_style == std::round_indeterminate) ? (comp(a, ref.result.first) || comp(a, ref.result.second)) : comp(a, b);
+				if (!equal)
+				{
+					double error = std::abs(static_cast<double>(a) - static_cast<double>(b));
+					//					if(std::abs(h2b(a)-h2b(b)) > 1)
+					//					std::cerr << x << ", " << y << " = " << a << '(' << std::hex << h2b(a) << "), " << b << '(' << h2b(b) << ") -> " << error << '\n' << std::dec;
+					err = std::max(err, error); rel = std::max(rel, error / std::abs(b)); bin = std::max(bin, std::abs(h2b(a) - h2b(b)));
+				}
+				passed += equal;
+			}
+			if (csv_)
+				*csv_ << name << ";" << (count - passed) << '\n';
+			return passed == count;
+		});
+		if (passed != count)
+			std::cout << name << ": " << (count - passed) << " of " << count << " failed\n";
+		if (err != 0.0 || rel != 0.0)
+			std::cout << name << " max error: " << err << ", max relative error: " << rel << ", max ulp error: " << /*ilog2*/(bin) << '\n';
+		return success;
+	}
+
+	template<typename F> bool ternary_reference_test(const std::string &name, F &&fn)
+	{
+		struct record { half x, y, z; std::pair<half, half> result; };
+		std::ifstream in("reference/" + name, std::ios_base::in | std::ios_base::binary | std::ios_base::ate);
+		if (!in)
+			throw std::runtime_error("cannot open reference file for " + name);
+		unsigned int passed = 0, count = in.tellg() / sizeof(record);
+		std::vector<record> reference(count);
+		in.seekg(0, std::ios_base::beg);
+		in.clear();
+		in.read(reinterpret_cast<char*>(reference.data()), reference.size() * sizeof(reference.front()));
+		double err = 0.0, rel = 0.0; int bin = 0;
+		bool success = simple_test(name, [&, this]() -> bool {
+			for (unsigned int i = 0; i < count; ++i)
+			{
+				auto ref = reference[i];
+				half x = ref.x, y = ref.y, z = ref.z, a = fn(x, y, z), b = select<std::numeric_limits<half>::round_style>(ref.result);
+				bool equal = (rough_ || std::numeric_limits<half>::round_style == std::round_indeterminate) ? (comp(a, ref.result.first) || comp(a, ref.result.second)) : comp(a, b);
+				if (!equal)
+				{
+					double error = std::abs(static_cast<double>(a) - static_cast<double>(b));
+					//					std::cerr << x << ", " << y << ", " << z << " = " << a << '(' << std::hex << h2b(a) << "), " << b << '(' << h2b(b) << ") -> " << error << '\n' << std::dec;
+					err = std::max(err, error); rel = std::max(rel, error / std::abs(b)); bin = std::max(bin, std::abs(h2b(a) - h2b(b)));
+				}
+				passed += equal;
+			}
+			if (csv_)
+				*csv_ << name << ";" << (count - passed) << '\n';
+			return passed == count;
+		});
+		if (passed != count)
+			std::cout << name << ": " << (count - passed) << " of " << count << " failed\n";
+		if (err != 0.0 || rel != 0.0)
+			std::cout << name << " max error: " << err << ", max relative error: " << rel << ", max ulp error: " << /*ilog2*/(bin) << '\n';
+		return success;
+	}
+
+	test_map halfs_;
+	class_map classes_;
+	unsigned int tests_;
+	std::vector<std::string> failed_;
+	std::ostream &log_;
+	std::ostream *csv_;
+	bool fast_;
+	bool rough_;
+};
+
+struct timer
+{
+	timer() : start_(std::chrono::high_resolution_clock::now()) {}
+	~timer() { std::cout << "time: " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start_).count() << " ms\n"; }
+
+private:
+	std::chrono::time_point<std::chrono::high_resolution_clock> start_;
+};
+
+int main(int argc, char *argv[]) try
+{
+#ifndef HALF_ARITHMETIC_TYPE
+	switch (std::numeric_limits<half>::round_style)
+	{
+#ifdef _WIN32
+	case std::round_to_nearest: _controlfp(_MCW_RC, _RC_NEAR); break;
+	case std::round_toward_zero: _controlfp(_MCW_RC, _RC_CHOP); break;
+	case std::round_toward_infinity: _controlfp(_MCW_RC, _RC_UP); break;
+	case std::round_toward_neg_infinity: _controlfp(_MCW_RC, _RC_DOWN); break;
+#else
+	case std::round_to_nearest: std::fesetround(FE_TONEAREST); break;
+	case std::round_toward_zero: std::fesetround(FE_TOWARDZERO); break;
+	case std::round_toward_infinity: std::fesetround(FE_UPWARD); break;
+	case std::round_toward_neg_infinity: std::fesetround(FE_DOWNWARD); break;
+#endif
+	}
+#endif
+	/*
+		auto rand_abs = std::bind(std::uniform_int_distribution<std::uint32_t>(0x00000000, 0x7F100000), std::default_random_engine());
+		auto rand_sign = std::bind(std::uniform_int_distribution<std::uint32_t>(0, 1), std::default_random_engine());
+		std::vector<float> floats;
+		for(unsigned int i=0; i<1e8; ++i)
+		{
+			auto bits = rand_abs() | (rand_sign()<<31);
+			floats.push_back(*reinterpret_cast<float*>(&bits));
+		}
+		std::shuffle(floats.begin(), floats.end(), std::default_random_engine());
+		std::vector<half> halfs(floats.size());
+		{
+			timer time;
+			for(std::size_t i=0; i<floats.size(); ++i)
+				halfs[i] = half_cast<half,std::round_to_nearest>(floats[i]);
+		}
+		return 0;
+
+		half pi = half_cast<half>(3.1415926535897932384626433832795l);
+		std::cout << "Pi: " << pi << " - 0x" << std::hex << std::setfill('0') << std::setw(4) << h2b(pi) << std::dec
+			<< " - " << std::bitset<16>(static_cast<unsigned long long>(h2b(pi))).to_string() << std::endl;
+		half e = half_cast<half>(2.7182818284590452353602874713527l);
+		std::cout << "e:  " << e << " - 0x" << std::hex << std::setfill('0') << std::setw(4) << h2b(e) << std::dec
+			<< " - " << std::bitset<16>(static_cast<unsigned long long>(h2b(e))).to_string() << std::endl;
+
+		static const long double logs[] = {
+			1.0000000000000000000000000000000000000000000000000000000000000000000000000000L, 0.5849625007211561814537389439478165087598144076924810604557526545410982276485L,
+			0.3219280948873623478703194294893901758648313930245806120547563958159347765589L, 0.1699250014423123629074778878956330175196288153849621209115053090821964552970L,
+			0.0874628412503394082540660108104043540112672823448206881266090643866965081686L, 0.0443941193584534376531019906736094674630459333742491317685543002674288465967L,
+			0.0223678130284545082671320837460849094932677948156179815932199216587899627785L, 0.0112272554232541203378805844158839407281095943600297940811823651462712311786L,
+			0.0056245491938781069198591026740666017211096815383520359072957784732489771013L, 0.0028150156070540381547362547502839489729507927389771959487826944878598909400L,
+			0.0014081943928083889066101665016890524233311715793462235597709051792834906001L, 0.0007042690112466432585379340422201964456668872087249334581924550139514213168L,
+			0.0003521774803010272377989609925281744988670304302127133979341729842842377649L, 0.0001760994864425060348637509459678580940163670081839283659942864068257522373L,
+			0.0000880524301221769086378699983597183301490534085738474534831071719854721939L, 0.0000440268868273167176441087067175806394819146645511899503059774914593663365L,
+			0.0000220136113603404964890728830697555571275493801909791504158295359319433723L, 0.0000110068476674814423006223021573490183469930819844945565597452748333526464L,
+			0.0000055034343306486037230640321058826431606183125807276574241540303833251704L, 0.0000027517197895612831123023958331509538486493412831626219340570294203116559L,
+			0.0000013758605508411382010566802834037147561973553922354232704569052932922954L, 0.0000006879304394358496786728937442939160483304056131990916985043387874690617L,
+			0.0000003439652607217645360118314743718005315334062644619363447395987584138324L, 0.0000001719826406118446361936972479533123619972434705828085978955697643547921L,
+			0.0000000859913228686632156462565208266682841603921494181830811515318381744650L, 0.0000000429956620750168703982940244684787907148132725669106053076409624949917L,
+			0.0000000214978311976797556164155504126645192380395989504741781512309853438587L, 0.0000000107489156388827085092095702361647949603617203979413516082280717515504L,
+			0.0000000053744578294520620044408178949217773318785601260677517784797554422804L, 0.0000000026872289172287079490026152352638891824761667284401180026908031182361L,
+			0.0000000013436144592400232123622589569799954658536700992739887706412976115422L, 0.0000000006718072297764289157920422846078078155859484240808550018085324187007L };
+		std::ofstream out("logs.txt");
+		for(auto val : logs)
+			out << "0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << std::llrint(std::ldexp(val, 27)) << ", \n";
+		return 0;
+
+		using namespace half_float::literal;
+		std::cout << "0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << std::llrint(std::ldexp(0.6072529350088812561694l, 30)) << '\n';
+		std::ofstream out("atans.txt");
+		for(int i=0; i<32; ++i)
+			out << "0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << std::llrint(std::ldexp(std::atan(std::ldexp(1.0l, -i)), 30)) << ", \n";
+		return 0;
+
+		for(std::uint16_t i=0x3C00; i<0x7C00; ++i)
+		{
+			half x = b2h(i), y = half_cast<half,std::round_toward_neg_infinity>(std::erfc(half_cast<double>(x)));
+			std::cout << x << " (" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << i << std::dec << ")\t= " << y << '\n';
+			if(y == 0.0_h)
+				return 0;
+		}
+
+		std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(9) << std::llrint(std::ldexp(3.15l, 31-1)) << '\n';
+		std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(9) << std::llrint(std::ldexp(3.85l, 31-1)) << '\n';
+		std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(9) << std::llrint(std::ldexp(4.65l, 31-2)) << '\n';
+		return 0;
+
+		for(std::uint16_t i=0xBC00; i<0xFC00; ++i)
+		{
+			half x = b2h(i), y = half_cast<half, std::round_to_nearest>(std::exp2(half_cast<double>(x)));
+			std::cout << x << " (" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << i << std::dec << ")\t= " << y << '\n';
+			if(y == 0.0_h)
+				return 0;
+		}
+	*/
+	std::vector<std::string> args(argv + 1, argv + argc);
+	std::unique_ptr<std::ostream> file, csv;
+	bool fast = false, rough = false;
+	for (auto &&arg : args)
+	{
+		if (arg == "-fast")
+			fast = true;
+		else if (arg == "-rough")
+			rough = true;
+		else if (arg.length() > 4 && arg.substr(arg.length() - 4) == ".csv")
+			csv.reset(new std::ofstream(arg));
+		else
+			file.reset(new std::ofstream(arg));
+	}
+	half_test test(file ? *file : std::cout, csv.get(), fast, rough);
+
+	test.performance_test();
+
+	timer time;
+	return test.test();
+}
+catch (const std::exception &e)
+{
+	std::cerr << "ERROR: " << e.what() << '\n';
+	return -1;
+}
--- a/source/tensor/halfLib/third-party/THHalf.h
+++ b/source/tensor/halfLib/third-party/THHalf.h
+/*
+ * This implementation is extracted from PyTorch:
+ *   Repo: github.com/pytorch/pytorch
+ *   File: torch/lib/TH/THHalf.c
+ *   Commit ID: 92481b59d31199df57420d4b14912348cc780d1d
+ * Functions are made "static inline" for performance
+ */
+
+ /* Copyright 1993-2014 NVIDIA Corporation.  All rights reserved. */
+
+ // Host functions for converting between FP32 and FP16 formats
+
+static inline void TH_halfbits2float(unsigned short* src, float* res)
+{
+	unsigned h = *src;
+	unsigned sign = ((h >> 15) & 1);
+	unsigned exponent = ((h >> 10) & 0x1f);
+	unsigned mantissa = ((h & 0x3ff) << 13);
+
+	if (exponent == 0x1f) {  /* NaN or Inf */
+		mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
+		exponent = 0xff;
+	}
+	else if (!exponent) {  /* Denorm or Zero */
+		if (mantissa) {
+			unsigned int msb;
+			exponent = 0x71;
+			do {
+				msb = (mantissa & 0x400000);
+				mantissa <<= 1;  /* normalize */
+				--exponent;
+			} while (!msb);
+			mantissa &= 0x7fffff;  /* 1.mantissa is implicit */
+		}
+	}
+	else {
+		exponent += 0x70;
+	}
+
+	*(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);
+}
+
+static inline void TH_float2halfbits(float* src, unsigned short* dest)
+{
+	unsigned x = *(unsigned*)src;
+	unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
+	unsigned sign, exponent, mantissa;
+
+	// Get rid of +NaN/-NaN case first.
+	if (u > 0x7f800000) {
+		*dest = 0x7fffU;
+		return;
+	}
+
+	sign = ((x >> 16) & 0x8000);
+
+	// Get rid of +Inf/-Inf, +0/-0.
+	if (u > 0x477fefff) {
+		*dest = sign | 0x7c00U;
+		return;
+	}
+	if (u < 0x33000001) {
+		*dest = (sign | 0x0000);
+		return;
+	}
+
+	exponent = ((u >> 23) & 0xff);
+	mantissa = (u & 0x7fffff);
+
+	if (exponent > 0x70) {
+		shift = 13;
+		exponent -= 0x70;
+	}
+	else {
+		shift = 0x7e - exponent;
+		exponent = 0;
+		mantissa |= 0x800000;
+	}
+	lsb = (1 << shift);
+	lsb_s1 = (lsb >> 1);
+	lsb_m1 = (lsb - 1);
+
+	// Round to nearest even.
+	remainder = (mantissa & lsb_m1);
+	mantissa >>= shift;
+	if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
+		++mantissa;
+		if (!(mantissa & 0x3ff)) {
+			++exponent;
+			mantissa = 0;
+		}
+	}
+
+	*dest = (sign | (exponent << 10) | mantissa);
+}
--- a/source/tensor/halfLib/third-party/eigen-half.h
+++ b/source/tensor/halfLib/third-party/eigen-half.h
+/*
+ * This implementation is extracted from Eigen:
+ *   Repo: bitbucket.org/eigen/eigen
+ *   File: Eigen/src/Core/arch/CUDA/Half.h
+ *   Commit ID: 96e0f73a35de54f675d825bef5339b2f08e77eb4
+ *
+ * Removed a lot of redundant and cuda-specific code.
+ */
+
+#define EIGEN_STRONG_INLINE static inline
+#define EIGEN_DEVICE_FUNC
+
+ // This file is part of Eigen, a lightweight C++ template library
+ // for linear algebra.
+ //
+ // This Source Code Form is subject to the terms of the Mozilla
+ // Public License v. 2.0. If a copy of the MPL was not distributed
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ //
+ // The conversion routines are Copyright (c) Fabian Giesen, 2016.
+ // The original license follows:
+ //
+ // Copyright (c) Fabian Giesen, 2016
+ // All rights reserved.
+ // Redistribution and use in source and binary forms, with or without
+ // modification, are permitted.
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ // AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+ // Standard 16-bit float type, mostly useful for GPUs. Defines a new
+ // type Eigen::half (inheriting from CUDA's __half struct) with
+ // operator overloads such that it behaves basically as an arithmetic
+ // type. It will be quite slow on CPUs (so it is recommended to stay
+ // in fp32 for CPUs, except for simple parameter conversions, I/O
+ // to disk and the likes), but fast on GPUs.
+
+
+#ifndef EIGEN_HALF_CUDA_H
+#define EIGEN_HALF_CUDA_H
+
+namespace Eigen {
+
+	namespace half_impl {
+
+		// Make our own __half definition that is similar to CUDA's.
+		struct __half {
+			EIGEN_DEVICE_FUNC __half() : x(0) {}
+			explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
+			unsigned short x;
+		};
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
+
+		// Conversion routines, including fallbacks for the host or older CUDA.
+		// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
+		// these in hardware. If we need more performance on older/other CPUs, they are
+		// also possible to vectorize directly.
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
+			__half h;
+			h.x = x;
+			return h;
+		}
+
+		union FP32 {
+			unsigned int u;
+			float f;
+		};
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+			return __float2half(ff);
+
+#elif defined(EIGEN_HAS_FP16_C)
+			__half h;
+			h.x = _cvtss_sh(ff, 0);
+			return h;
+
+#else
+			FP32 f; f.f = ff;
+
+			const FP32 f32infty = { 255 << 23 };
+			const FP32 f16max = { (127 + 16) << 23 };
+			const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
+			unsigned int sign_mask = 0x80000000u;
+			__half o;
+			o.x = static_cast<unsigned short>(0x0u);
+
+			unsigned int sign = f.u & sign_mask;
+			f.u ^= sign;
+
+			// NOTE all the integer compares in this function can be safely
+			// compiled into signed compares since all operands are below
+			// 0x80000000. Important if you want fast straight SSE2 code
+			// (since there's no unsigned PCMPGTD).
+
+			if (f.u >= f16max.u) {  // result is Inf or NaN (all exponent bits set)
+				o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+			}
+			else {  // (De)normalized number or zero
+				if (f.u < (113 << 23)) {  // resulting FP16 is subnormal or zero
+				  // use a magic value to align our 10 mantissa bits at the bottom of
+				  // the float. as long as FP addition is round-to-nearest-even this
+				  // just works.
+					f.f += denorm_magic.f;
+
+					// and one integer subtract of the bias later, we have our final float!
+					o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
+				}
+				else {
+					unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
+
+					// update exponent, rounding bias part 1
+					f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
+					// rounding bias part 2
+					f.u += mant_odd;
+					// take the bits!
+					o.x = static_cast<unsigned short>(f.u >> 13);
+				}
+			}
+
+			o.x |= static_cast<unsigned short>(sign >> 16);
+			return o;
+#endif
+		}
+
+		EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
+			return __half2float(h);
+
+#elif defined(EIGEN_HAS_FP16_C)
+			return _cvtsh_ss(h.x);
+
+#else
+			const FP32 magic = { 113 << 23 };
+			const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
+			FP32 o;
+
+			o.u = (h.x & 0x7fff) << 13;             // exponent/mantissa bits
+			unsigned int exp = shifted_exp & o.u;   // just the exponent
+			o.u += (127 - 15) << 23;                // exponent adjust
+
+			// handle exponent special cases
+			if (exp == shifted_exp) {     // Inf/NaN?
+				o.u += (128 - 16) << 23;    // extra exp adjust
+			}
+			else if (exp == 0) {        // Zero/Denormal?
+				o.u += 1 << 23;             // extra exp adjust
+				o.f -= magic.f;             // renormalize
+			}
+
+			o.u |= (h.x & 0x8000) << 16;    // sign bit
+			return o.f;
+#endif
+		}
+
+	} // end namespace half_impl
+
+} // end namespace Eigen
+
+#endif // EIGEN_HALF_CUDA_H
--- a/source/tensor/halfLib/third-party/float16-compressor.h
+++ b/source/tensor/halfLib/third-party/float16-compressor.h
+#pragma once
+#include <stdint.h>
+/*
+ * This code snippet posted by user Phernost on
+ * https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
+ *
+ * compress and decompress methods are made "inline" for performance
+ */
+
+class Float16Compressor
+{
+	union Bits
+	{
+		float f;
+		int32_t si;
+		uint32_t ui;
+	};
+
+	static int const shift = 13;
+	static int const shiftSign = 16;
+
+	static int32_t const infN = 0x7F800000; // flt32 infinity
+	static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32
+	static int32_t const minN = 0x38800000; // min flt16 normal as a flt32
+	static int32_t const signN = 0x80000000; // flt32 sign bit
+
+	static int32_t const infC = infN >> shift;
+	static int32_t const nanN = (infC + 1) << shift; // minimum flt16 nan as a flt32
+	static int32_t const maxC = maxN >> shift;
+	static int32_t const minC = minN >> shift;
+	static int32_t const signC = signN >> shiftSign; // flt16 sign bit
+
+	static int32_t const mulN = 0x52000000; // (1 << 23) / minN
+	static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift))
+
+	static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted
+	static int32_t const norC = 0x00400; // min flt32 normal down shifted
+
+	static int32_t const maxD = infC - maxC - 1;
+	static int32_t const minD = minC - subC - 1;
+
+public:
+
+	inline static uint16_t compress(float value)
+	{
+		Bits v, s;
+		v.f = value;
+		uint32_t sign = v.si & signN;
+		v.si ^= sign;
+		sign >>= shiftSign; // logical shift
+		s.si = mulN;
+		s.si = s.f * v.f; // correct subnormals
+		v.si ^= (s.si ^ v.si) & -(minN > v.si);
+		v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
+		v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
+		v.ui >>= shift; // logical shift
+		v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
+		v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
+		return v.ui | sign;
+	}
+
+	inline static float decompress(uint16_t value)
+	{
+		Bits v;
+		v.ui = value;
+		int32_t sign = v.si & signC;
+		v.si ^= sign;
+		sign <<= shiftSign;
+		v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
+		v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
+		Bits s;
+		s.si = mulC;
+		s.f *= v.si;
+		int32_t mask = -(norC > v.si);
+		v.si <<= shift;
+		v.si ^= (s.si ^ v.si) & mask;
+		v.si |= sign;
+		return v.f;
+	}
+};
\ No newline at end of file
--- a/source/tensor/halfLib/third-party/npy-halffloat.h
+++ b/source/tensor/halfLib/third-party/npy-halffloat.h
+/*
+ * This implementation is extracted from numpy:
+ *   Repo: github.com/numpy/numpy
+ *   File: numpy/core/src/npymath/halffloat.c
+ *   Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
+ * Functions are made "static inline" for performance, and
+ * non-conversion functions are removed, and generation of
+ * exceptions is disabled.
+ */
+
+#include <cstdint>
+typedef uint16_t npy_uint16;
+typedef uint32_t npy_uint32;
+typedef uint64_t npy_uint64;
+
+/*
+ * This chooses between 'ties to even' and 'ties away from zero'.
+ */
+#define NPY_HALF_ROUND_TIES_TO_EVEN 1
+ /*
+  * If these are 1, the conversions try to trigger underflow,
+  * overflow, and invalid exceptions in the FP system when needed.
+  */
+#define NPY_HALF_GENERATE_OVERFLOW 0
+#define NPY_HALF_GENERATE_UNDERFLOW 0
+#define NPY_HALF_GENERATE_INVALID 0
+
+  /*
+   ********************************************************************
+   *                     BIT-LEVEL CONVERSIONS                        *
+   ********************************************************************
+   */
+
+static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
+{
+	npy_uint32 f_exp, f_sig;
+	npy_uint16 h_sgn, h_exp, h_sig;
+
+	h_sgn = (npy_uint16)((f & 0x80000000u) >> 16);
+	f_exp = (f & 0x7f800000u);
+
+	/* Exponent overflow/NaN converts to signed inf/NaN */
+	if (f_exp >= 0x47800000u) {
+		if (f_exp == 0x7f800000u) {
+			/* Inf or NaN */
+			f_sig = (f & 0x007fffffu);
+			if (f_sig != 0) {
+				/* NaN - propagate the flag in the significand... */
+				npy_uint16 ret = (npy_uint16)(0x7c00u + (f_sig >> 13));
+				/* ...but make sure it stays a NaN */
+				if (ret == 0x7c00u) {
+					ret++;
+				}
+				return h_sgn + ret;
+			}
+			else {
+				/* signed inf */
+				return (npy_uint16)(h_sgn + 0x7c00u);
+			}
+		}
+		else {
+			/* overflow to signed inf */
+#if NPY_HALF_GENERATE_OVERFLOW
+			npy_set_floatstatus_overflow();
+#endif
+			return (npy_uint16)(h_sgn + 0x7c00u);
+		}
+	}
+
+	/* Exponent underflow converts to a subnormal half or signed zero */
+	if (f_exp <= 0x38000000u) {
+		/*
+		 * Signed zeros, subnormal floats, and floats with small
+		 * exponents all convert to signed zero halfs.
+		 */
+		if (f_exp < 0x33000000u) {
+#if NPY_HALF_GENERATE_UNDERFLOW
+			/* If f != 0, it underflowed to 0 */
+			if ((f & 0x7fffffff) != 0) {
+				npy_set_floatstatus_underflow();
+			}
+#endif
+			return h_sgn;
+		}
+		/* Make the subnormal significand */
+		f_exp >>= 23;
+		f_sig = (0x00800000u + (f & 0x007fffffu));
+#if NPY_HALF_GENERATE_UNDERFLOW
+		/* If it's not exactly represented, it underflowed */
+		if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
+			npy_set_floatstatus_underflow();
+		}
+#endif
+		f_sig >>= (113 - f_exp);
+		/* Handle rounding by adding 1 to the bit beyond half precision */
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+		/*
+		 * If the last bit in the half significand is 0 (already even), and
+		 * the remaining bit pattern is 1000...0, then we do not add one
+		 * to the bit after the half significand.  In all other cases, we do.
+		 */
+		if ((f_sig & 0x00003fffu) != 0x00001000u) {
+			f_sig += 0x00001000u;
+		}
+#else
+		f_sig += 0x00001000u;
+#endif
+		h_sig = (npy_uint16)(f_sig >> 13);
+		/*
+		 * If the rounding causes a bit to spill into h_exp, it will
+		 * increment h_exp from zero to one and h_sig will be zero.
+		 * This is the correct result.
+		 */
+		return (npy_uint16)(h_sgn + h_sig);
+	}
+
+	/* Regular case with no overflow or underflow */
+	h_exp = (npy_uint16)((f_exp - 0x38000000u) >> 13);
+	/* Handle rounding by adding 1 to the bit beyond half precision */
+	f_sig = (f & 0x007fffffu);
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+	/*
+	 * If the last bit in the half significand is 0 (already even), and
+	 * the remaining bit pattern is 1000...0, then we do not add one
+	 * to the bit after the half significand.  In all other cases, we do.
+	 */
+	if ((f_sig & 0x00003fffu) != 0x00001000u) {
+		f_sig += 0x00001000u;
+	}
+#else
+	f_sig += 0x00001000u;
+#endif
+	h_sig = (npy_uint16)(f_sig >> 13);
+	/*
+	 * If the rounding causes a bit to spill into h_exp, it will
+	 * increment h_exp by one and h_sig will be zero.  This is the
+	 * correct result.  h_exp may increment to 15, at greatest, in
+	 * which case the result overflows to a signed inf.
+	 */
+#if NPY_HALF_GENERATE_OVERFLOW
+	h_sig += h_exp;
+	if (h_sig == 0x7c00u) {
+		npy_set_floatstatus_overflow();
+	}
+	return h_sgn + h_sig;
+#else
+	return h_sgn + h_exp + h_sig;
+#endif
+}
+
+static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
+{
+	npy_uint64 d_exp, d_sig;
+	npy_uint16 h_sgn, h_exp, h_sig;
+
+	h_sgn = (d & 0x8000000000000000ULL) >> 48;
+	d_exp = (d & 0x7ff0000000000000ULL);
+
+	/* Exponent overflow/NaN converts to signed inf/NaN */
+	if (d_exp >= 0x40f0000000000000ULL) {
+		if (d_exp == 0x7ff0000000000000ULL) {
+			/* Inf or NaN */
+			d_sig = (d & 0x000fffffffffffffULL);
+			if (d_sig != 0) {
+				/* NaN - propagate the flag in the significand... */
+				npy_uint16 ret = (npy_uint16)(0x7c00u + (d_sig >> 42));
+				/* ...but make sure it stays a NaN */
+				if (ret == 0x7c00u) {
+					ret++;
+				}
+				return h_sgn + ret;
+			}
+			else {
+				/* signed inf */
+				return h_sgn + 0x7c00u;
+			}
+		}
+		else {
+			/* overflow to signed inf */
+#if NPY_HALF_GENERATE_OVERFLOW
+			npy_set_floatstatus_overflow();
+#endif
+			return h_sgn + 0x7c00u;
+		}
+	}
+
+	/* Exponent underflow converts to subnormal half or signed zero */
+	if (d_exp <= 0x3f00000000000000ULL) {
+		/*
+		 * Signed zeros, subnormal floats, and floats with small
+		 * exponents all convert to signed zero halfs.
+		 */
+		if (d_exp < 0x3e60000000000000ULL) {
+#if NPY_HALF_GENERATE_UNDERFLOW
+			/* If d != 0, it underflowed to 0 */
+			if ((d & 0x7fffffffffffffffULL) != 0) {
+				npy_set_floatstatus_underflow();
+			}
+#endif
+			return h_sgn;
+		}
+		/* Make the subnormal significand */
+		d_exp >>= 52;
+		d_sig = (0x0010000000000000ULL + (d & 0x000fffffffffffffULL));
+#if NPY_HALF_GENERATE_UNDERFLOW
+		/* If it's not exactly represented, it underflowed */
+		if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
+			npy_set_floatstatus_underflow();
+		}
+#endif
+		d_sig >>= (1009 - d_exp);
+		/* Handle rounding by adding 1 to the bit beyond half precision */
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+		/*
+		 * If the last bit in the half significand is 0 (already even), and
+		 * the remaining bit pattern is 1000...0, then we do not add one
+		 * to the bit after the half significand.  In all other cases, we do.
+		 */
+		if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+			d_sig += 0x0000020000000000ULL;
+		}
+#else
+		d_sig += 0x0000020000000000ULL;
+#endif
+		h_sig = (npy_uint16)(d_sig >> 42);
+		/*
+		 * If the rounding causes a bit to spill into h_exp, it will
+		 * increment h_exp from zero to one and h_sig will be zero.
+		 * This is the correct result.
+		 */
+		return h_sgn + h_sig;
+	}
+
+	/* Regular case with no overflow or underflow */
+	h_exp = (npy_uint16)((d_exp - 0x3f00000000000000ULL) >> 42);
+	/* Handle rounding by adding 1 to the bit beyond half precision */
+	d_sig = (d & 0x000fffffffffffffULL);
+#if NPY_HALF_ROUND_TIES_TO_EVEN
+	/*
+	 * If the last bit in the half significand is 0 (already even), and
+	 * the remaining bit pattern is 1000...0, then we do not add one
+	 * to the bit after the half significand.  In all other cases, we do.
+	 */
+	if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+		d_sig += 0x0000020000000000ULL;
+	}
+#else
+	d_sig += 0x0000020000000000ULL;
+#endif
+	h_sig = (npy_uint16)(d_sig >> 42);
+
+	/*
+	 * If the rounding causes a bit to spill into h_exp, it will
+	 * increment h_exp by one and h_sig will be zero.  This is the
+	 * correct result.  h_exp may increment to 15, at greatest, in
+	 * which case the result overflows to a signed inf.
+	 */
+#if NPY_HALF_GENERATE_OVERFLOW
+	h_sig += h_exp;
+	if (h_sig == 0x7c00u) {
+		npy_set_floatstatus_overflow();
+	}
+	return h_sgn + h_sig;
+#else
+	return h_sgn + h_exp + h_sig;
+#endif
+}
+
+static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
+{
+	npy_uint16 h_exp, h_sig;
+	npy_uint32 f_sgn, f_exp, f_sig;
+
+	h_exp = (h & 0x7c00u);
+	f_sgn = ((npy_uint32)h & 0x8000u) << 16;
+	switch (h_exp) {
+	case 0x0000u: /* 0 or subnormal */
+		h_sig = (h & 0x03ffu);
+		/* Signed zero */
+		if (h_sig == 0) {
+			return f_sgn;
+		}
+		/* Subnormal */
+		h_sig <<= 1;
+		while ((h_sig & 0x0400u) == 0) {
+			h_sig <<= 1;
+			h_exp++;
+		}
+		f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
+		f_sig = ((npy_uint32)(h_sig & 0x03ffu)) << 13;
+		return f_sgn + f_exp + f_sig;
+	case 0x7c00u: /* inf or NaN */
+		/* All-ones exponent and a copy of the significand */
+		return f_sgn + 0x7f800000u + (((npy_uint32)(h & 0x03ffu)) << 13);
+	default: /* normalized */
+		/* Just need to adjust the exponent and shift */
+		return f_sgn + (((npy_uint32)(h & 0x7fffu) + 0x1c000u) << 13);
+	}
+}
+
+static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
+{
+	npy_uint16 h_exp, h_sig;
+	npy_uint64 d_sgn, d_exp, d_sig;
+
+	h_exp = (h & 0x7c00u);
+	d_sgn = ((npy_uint64)h & 0x8000u) << 48;
+	switch (h_exp) {
+	case 0x0000u: /* 0 or subnormal */
+		h_sig = (h & 0x03ffu);
+		/* Signed zero */
+		if (h_sig == 0) {
+			return d_sgn;
+		}
+		/* Subnormal */
+		h_sig <<= 1;
+		while ((h_sig & 0x0400u) == 0) {
+			h_sig <<= 1;
+			h_exp++;
+		}
+		d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
+		d_sig = ((npy_uint64)(h_sig & 0x03ffu)) << 42;
+		return d_sgn + d_exp + d_sig;
+	case 0x7c00u: /* inf or NaN */
+		/* All-ones exponent and a copy of the significand */
+		return d_sgn + 0x7ff0000000000000ULL +
+			(((npy_uint64)(h & 0x03ffu)) << 42);
+	default: /* normalized */
+		/* Just need to adjust the exponent and shift */
+		return d_sgn + (((npy_uint64)(h & 0x7fffu) + 0xfc000u) << 42);
+	}
+}