Commit 41dbf0a9 by ltb

add cpu half test library , dir is source/tensor/halfLib

parent 1da50ae2
#include <iostream>
#include <assert.h>
#include <direct.h>
#include "../core/utilities/FlushToMem.h"
#include "../core/getandset/ConvertDataType.h"
#include "../XTensor.h"
#include "umHalf.h"
using namespace nts;
//#define VALIDATE(x) if (!(x)){std::cout << "Failed: " << #x << std::endl;assert((x));}
int main(int argc, char* argv[])
{
char *path;
path = getcwd(NULL, 0);
strcat(path, "\\source\\tensor\\HalfFloat\\dump");
XTensor a;
XTensor halfa;
int dim = 4;
int devId = 0;
InitTensor2DV2(&a,dim,dim,X_FLOAT,devId);
a.SetDataRand(-2.0,2.0);
halfa = ConvertDataType(a, X_FLOAT16);
halfa.Dump(&halfa, stderr, "halfa:");
GPUToCPUFlush(&halfa);
FILE * file = fopen(path, "wb");
halfa.Dump(file, "halfa:");
fclose(file);
XTensor halfb;
InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
FILE *read = fopen(path, "rb");
halfb.Read(read, "halfa:");
fclose(read);
halfb.Dump(&halfb, stderr, "halfb:");
//half h = 1.f, h2 = 2.f;
//--h2;
//++h2;
//--h;
//++h;
//h2 -= 1.f;
//float f = h2, f2 = h;
//VALIDATE(1.f == f && f == f2);
//half dddd = 15.5;
//float hhhh = 15.5;
//printf("%x\n", dddd);
//printf("%x\n", hhhh);
//h = h2;
//h2 = 15.5f;
//f = h2, f2 = h;
//VALIDATE(15.5f == f && 1.f == f2);
//h2 *= h;
//f = h2, f2 = h;
//VALIDATE(15.5f == f && 1.f == f2);
//h2 /= h;
//f = h2, f2 = h;
//VALIDATE(15.5f == f && 1.f == f2);
//h2 += h;
//f = h2, f2 = h;
//VALIDATE(16.5f == f && 1.f == f2);
//h++; h++; h++;
//h2 = -h2;
//h2 += 17.5f;
//h2 *= h;
//f = h2, f2 = h;
//VALIDATE(4.f == f && 4.f == f2);
//VALIDATE(h == h2);
//VALIDATE(h <= h2);
//--h;
//VALIDATE(h <= h2);
//h -= 250.f;
//VALIDATE(h < h2);
//h += 500.f;
//VALIDATE(h > h2);
//VALIDATE(h >= h2);
//f = h2, f2 = h;
//VALIDATE(h * h2 == (half)(f * f2));
//// addition
//// ****************************************************************************
//// identical exponents
//for (float f = 0.f; f < 1000.f; ++f)
//{
// half one = f;
// half two = f;
// half three = one + two;
// f2 = three;
// VALIDATE(f*2.f == f2);
//}
//// different exponents
//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
//{
// half one = f;
// half two = fp;
// half three = one + two;
// f2 = three;
// VALIDATE(f + fp == f2);
//}
//// very small numbers - this is already beyond the accuracy of 16 bit floats.
//for (float f = 0.003f; f < 1000.f; f += 0.0005f)
//{
// half one = f;
// half two = f;
// half three = one + two;
// f2 = three;
// float m = f * 2.f;
// VALIDATE(f2 > (m - 0.05*m) && f2 < (m + 0.05*m));
//}
//// subtraction
//// ****************************************************************************
//// identical exponents
//for (float f = 0.f; f < 1000.f; ++f)
//{
// half one = f;
// half two = f;
// half three = one - two;
// f2 = three;
// VALIDATE(0.f == f2);
//}
//// different exponents
//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
//{
// half one = f;
// half two = fp;
// half three = one - two;
// f2 = three;
// VALIDATE(f - fp == f2);
//}
return 0;
}
https://github.com/acgessler/half_float
C++ implementation of a 16 bit floating-point type mimicking most of the IEEE 754 behaviour. Compatible with the half data type used as texture format by OpenGl/Direct3D.
\ No newline at end of file
halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
be2c 3ffd bf2c 3c52 a8f6 3a6a afcf 3eca 3e47 3852 bf6e 3bc8 bff5 bc12 b266 31a4
#include <iostream>
#include <assert.h>
#include <direct.h>
#include "../../core/utilities/FlushToMem.h"
#include "../../core/getandset/ConvertDataType.h"
#include "../../XTensor.h"
#include "../../XGlobal.h"
#include "umHalf.h"
using namespace nts;
int main(int argc, char* argv[])
{
char *path;
path = getcwd(NULL, 0);
strcat(path, "\\source\\tensor\\halfLib\\HalfFloat\\dump");
XTensor a;
XTensor halfa;
int dim = 4;
int devId = 0;
InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
a.SetDataRand(-2.0, 2.0);
halfa = ConvertDataType(a, X_FLOAT16);
printf("============save model================\n");
halfa.Dump(&halfa, stderr, "halfa:");
GPUToCPUFlush(&halfa);
FILE * file = fopen(path, "wb");
halfa.Dump(file, "halfa:");
//a.Dump(file, "a");
fclose(file);
XTensor halfb;
InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
XTensor b;
InitTensor2DV2(&b, dim, dim, X_FLOAT, devId);
printf("==============read model=============\n");
FILE *read = fopen(path, "rb");
halfb.Read(read, "halfa:");
//b.Read(read, "a");
fclose(read);
halfb.Dump(&halfb, stderr, "halfb:");
return 0;
}
\ No newline at end of file
// ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h>
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#if (_MSC_VER < 1300) && defined(__cplusplus)
extern "C++" {
#endif
# include <wchar.h>
#if (_MSC_VER < 1300) && defined(__cplusplus)
}
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef int intptr_t;
typedef unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
#define INTMAX_C INT64_C
#define UINTMAX_C UINT64_C
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]
\ No newline at end of file
///////////////////////////////////////////////////////////////////////////////////
/*
Copyright (c) 2006-2008,
Chris "Krishty" Maiwald, Alexander "Aramis" Gessler
All rights reserved.
Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other
materials provided with the distribution.
* Neither the name of the class, nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior
written permission of the Development Team.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
///////////////////////////////////////////////////////////////////////////////////
#ifndef UM_HALF_H_INCLUDED
#define UM_HALF_H_INCLUDED
#include <limits>
#include <algorithm>
//#ifdef _MSC_VER
//#include "stdint.h"
//#else
//#include <stdint.h>
//#endif
#include<stdint.h>
#undef min
#undef max
///////////////////////////////////////////////////////////////////////////////////
/** 1. Represents a half-precision floating point value (16 bits) that behaves
* nearly conformant to the IEE 754 standard for floating-point computations.
*
* Not all operators have special implementations, most perform time-consuming
* conversions from half to float and back again.
* Differences to IEEE 754:
* - no difference between qnan and snan
* - no traps
* - no well-defined rounding mode
*/
///////////////////////////////////////////////////////////////////////////////////
class HalfFloat
{
friend HalfFloat operator+ (HalfFloat, HalfFloat);
friend HalfFloat operator- (HalfFloat, HalfFloat);
friend HalfFloat operator* (HalfFloat, HalfFloat);
friend HalfFloat operator/ (HalfFloat, HalfFloat);
public:
enum { BITS_MANTISSA = 10 };
enum { BITS_EXPONENT = 5 };
enum { MAX_EXPONENT_VALUE = 31 };
enum { BIAS = MAX_EXPONENT_VALUE / 2 };
enum { MAX_EXPONENT = BIAS };
enum { MIN_EXPONENT = -BIAS };
enum { MAX_EXPONENT10 = 9 };
enum { MIN_EXPONENT10 = -9 };
public:
/** Default constructor. Unitialized by default.
*/
inline HalfFloat() {}
/** Construction from an existing half
*/
inline HalfFloat(const HalfFloat& other)
: bits(other.GetBits())
{}
/** Construction from existing values for mantissa, sign
* and exponent. No validation is performed.
* @note The exponent is unsigned and biased by #BIAS
*/
inline HalfFloat(uint16_t _m, uint16_t _e, uint16_t _s);
/** Construction from a single-precision float
*/
inline HalfFloat(float other);
/** Construction from a double-precision float
*/
inline HalfFloat(const double);
/** Conversion operator to convert from half to float
*/
inline operator float() const;
/** Conversion operator to convert from half to double
*/
inline operator double() const;
/** Assignment operator to assign another half to
* *this* object.
*/
inline HalfFloat& operator= (HalfFloat other);
inline HalfFloat& operator= (float other);
inline HalfFloat& operator= (const double other);
/** Comparison operators
*/
inline bool operator== (HalfFloat other) const;
inline bool operator!= (HalfFloat other) const;
/** Relational comparison operators
*/
inline bool operator< (HalfFloat other) const;
inline bool operator> (HalfFloat other) const;
inline bool operator<= (HalfFloat other) const;
inline bool operator>= (HalfFloat other) const;
inline bool operator< (float other) const;
inline bool operator> (float other) const;
inline bool operator<= (float other) const;
inline bool operator>= (float other) const;
/** Combined assignment operators
*/
inline HalfFloat& operator += (HalfFloat other);
inline HalfFloat& operator -= (HalfFloat other);
inline HalfFloat& operator *= (HalfFloat other);
inline HalfFloat& operator /= (HalfFloat other);
inline HalfFloat& operator += (float other);
inline HalfFloat& operator -= (float other);
inline HalfFloat& operator *= (float other);
inline HalfFloat& operator /= (float other);
/** Post and prefix increment operators
*/
inline HalfFloat& operator++();
inline HalfFloat operator++(int);
/** Post and prefix decrement operators
*/
inline HalfFloat& operator--();
inline HalfFloat operator--(int);
/** Unary minus operator
*/
inline HalfFloat operator-() const;
/** Provides direct access to the bits of a half float
*/
inline uint16_t GetBits() const;
inline uint16_t& GetBits();
/** Classification of floating-point types
*/
inline bool IsNaN() const;
inline bool IsInfinity() const;
inline bool IsDenorm() const;
/** Returns the sign of the floating-point value -
* true stands for positive.
*/
inline bool GetSign() const;
public:
union
{
uint16_t bits; // All bits
struct
{
uint16_t Frac : 10; // mantissa
uint16_t Exp : 5; // exponent
uint16_t Sign : 1; // sign
} IEEE;
};
union IEEESingle
{
float Float;
struct
{
uint32_t Frac : 23;
uint32_t Exp : 8;
uint32_t Sign : 1;
} IEEE;
};
union IEEEDouble
{
double Double;
struct {
uint64_t Frac : 52;
uint64_t Exp : 11;
uint64_t Sign : 1;
} IEEE;
};
// Enums can not store 64 bit values, so we have to use static constants.
static const uint64_t IEEEDouble_MaxExpontent = 0x7FF;
static const uint64_t IEEEDouble_ExponentBias = IEEEDouble_MaxExpontent / 2;
};
/** 2. Binary operations
*/
inline HalfFloat operator+ (HalfFloat one, HalfFloat two);
inline HalfFloat operator- (HalfFloat one, HalfFloat two);
inline HalfFloat operator* (HalfFloat one, HalfFloat two);
inline HalfFloat operator/ (HalfFloat one, HalfFloat two);
inline float operator+ (HalfFloat one, float two);
inline float operator- (HalfFloat one, float two);
inline float operator* (HalfFloat one, float two);
inline float operator/ (HalfFloat one, float two);
inline float operator+ (float one, HalfFloat two);
inline float operator- (float one, HalfFloat two);
inline float operator* (float one, HalfFloat two);
inline float operator/ (float one, HalfFloat two);
///////////////////////////////////////////////////////////////////////////////////
/** 3. Specialization of std::numeric_limits for type half.
*/
///////////////////////////////////////////////////////////////////////////////////
namespace std {
template <>
class numeric_limits<HalfFloat> {
public:
// General -- meaningful for all specializations.
static const bool is_specialized = true;
static HalfFloat min()
{
return HalfFloat(0, 1, 0);
}
static HalfFloat max()
{
return HalfFloat(~0, HalfFloat::MAX_EXPONENT_VALUE - 1, 0);
}
static const int radix = 2;
static const int digits = 10; // conservative assumption
static const int digits10 = 2; // conservative assumption
static const bool is_signed = true;
static const bool is_integer = true;
static const bool is_exact = false;
static const bool traps = false;
static const bool is_modulo = false;
static const bool is_bounded = true;
// Floating point specific.
static HalfFloat epsilon()
{
return HalfFloat(0.00097656f);
} // from OpenEXR, needs to be confirmed
static HalfFloat round_error()
{
return HalfFloat(0.00097656f / 2);
}
static const int min_exponent10 = HalfFloat::MIN_EXPONENT10;
static const int max_exponent10 = HalfFloat::MAX_EXPONENT10;
static const int min_exponent = HalfFloat::MIN_EXPONENT;
static const int max_exponent = HalfFloat::MAX_EXPONENT;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const bool is_iec559 = false;
static const bool has_denorm = denorm_present;
static const bool tinyness_before = false;
static const float_round_style round_style = round_to_nearest;
static HalfFloat denorm_min()
{
return HalfFloat(1, 0, 1);
}
static HalfFloat infinity()
{
return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, 0);
}
static HalfFloat quiet_NaN()
{
return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
}
static HalfFloat signaling_NaN()
{
return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
}
};
} // end namespace std
#include "./umHalf.inl"
#ifndef UM_HALF_NO_TYPEDEFS
typedef HalfFloat float16;
typedef HalfFloat halfCPU;
#endif
#endif // !! UM_HALF_H_INCLUDED
///////////////////////////////////////////////////////////////////////////////////
/*
Copyright (c) 2006-2008, Alexander Gessler
All rights reserved.
Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other
materials provided with the distribution.
* Neither the name of the ASSIMP team, nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior
written permission of the ASSIMP Development Team.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
///////////////////////////////////////////////////////////////////////////////////
#ifndef UM_HALF_INL_INCLUDED
#define UM_HALF_INL_INCLUDED
#ifdef _MSC_VER
#include <intrin.h>
#pragma intrinsic(_BitScanReverse)
#endif
// ------------------------------------------------------------------------------------------------
inline HalfFloat::HalfFloat(float other)
{
IEEESingle f;
f.Float = other;
IEEE.Sign = f.IEEE.Sign;
if (!f.IEEE.Exp)
{
IEEE.Frac = 0;
IEEE.Exp = 0;
}
else if (f.IEEE.Exp == 0xff)
{
// NaN or INF
IEEE.Frac = (f.IEEE.Frac != 0) ? 1 : 0;
IEEE.Exp = 31;
}
else
{
// regular number
int new_exp = f.IEEE.Exp - 127;
if (new_exp < -24)
{ // this maps to 0
IEEE.Frac = 0;
IEEE.Exp = 0;
}
else if (new_exp < -14)
{
// this maps to a denorm
IEEE.Exp = 0;
unsigned int exp_val = (unsigned int)(-14 - new_exp); // 2^-exp_val
switch (exp_val)
{
case 0:
IEEE.Frac = 0;
break;
case 1: IEEE.Frac = 512 + (f.IEEE.Frac >> 14); break;
case 2: IEEE.Frac = 256 + (f.IEEE.Frac >> 15); break;
case 3: IEEE.Frac = 128 + (f.IEEE.Frac >> 16); break;
case 4: IEEE.Frac = 64 + (f.IEEE.Frac >> 17); break;
case 5: IEEE.Frac = 32 + (f.IEEE.Frac >> 18); break;
case 6: IEEE.Frac = 16 + (f.IEEE.Frac >> 19); break;
case 7: IEEE.Frac = 8 + (f.IEEE.Frac >> 20); break;
case 8: IEEE.Frac = 4 + (f.IEEE.Frac >> 21); break;
case 9: IEEE.Frac = 2 + (f.IEEE.Frac >> 22); break;
case 10: IEEE.Frac = 1; break;
}
}
else if (new_exp > 15)
{ // map this value to infinity
IEEE.Frac = 0;
IEEE.Exp = 31;
}
else
{
IEEE.Exp = new_exp + 15;
IEEE.Frac = (f.IEEE.Frac >> 13);
}
}
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat::HalfFloat(const double p_Reference)
{
const IEEEDouble & l_Reference = reinterpret_cast<const IEEEDouble &>(p_Reference);
// Copy the sign bit.
this->IEEE.Sign = l_Reference.IEEE.Sign;
// Check for special values: Is the exponent zero?
if (0 == l_Reference.IEEE.Exp)
{
// A zero exponent indicates either a zero or a subnormal number. A subnormal float can not
// be represented as a half, so either one will be saved as a zero.
this->IEEE.Exp = 0;
this->IEEE.Frac = 0;
}
// Is the exponent all one?
else if (IEEEDouble_MaxExpontent == l_Reference.IEEE.Exp)
{
this->IEEE.Exp = MAX_EXPONENT_VALUE;
// A zero fraction indicates an Infinite value.
if (0 == l_Reference.IEEE.Frac)
this->IEEE.Frac = 0;
// A nonzero fraction indicates NaN. Such a fraction contains further information, e.g. to
// distinguish a QNaN from a SNaN. However, we can not just shift-copy the fraction:
// if the first five bits were zero we would save an infinite value, so we abandon the
// fraction information and set it to a nonzero value.
else
this->IEEE.Frac = 1;
}
// A usual value?
else {
// First, we have to adjust the exponent. It is stored as an unsigned int, to reconstruct
// its original value we have to subtract its bias (half of its range).
const int64_t l_AdjustedExponent = l_Reference.IEEE.Exp - IEEEDouble_ExponentBias;
// Very small values will be rounded to zero.
if (-24 > l_AdjustedExponent)
{
this->IEEE.Frac = 0;
this->IEEE.Exp = 0;
}
// Some small values can be stored as subnormal values.
else if (-14 > l_AdjustedExponent)
{
// The exponent of subnormal values is always be zero.
this->IEEE.Exp = 0;
// The exponent will now be stored in the fraction.
const int16_t l_NewExponent = int16_t(-14 - l_AdjustedExponent); // 2 ^ -l_NewExponent
this->IEEE.Frac = (1024 >> l_NewExponent) + int16_t(l_Reference.IEEE.Frac >> (42 + l_NewExponent));
}
// Very large numbers will be rounded to infinity.
else if (15 < l_AdjustedExponent)
{
// Exponent all one, fraction zero.
this->IEEE.Exp = MAX_EXPONENT_VALUE;
this->IEEE.Frac = 0;
}
// All remaining numbers can be converted directly.
else
{
// We reconstructed the exponent by subtracting the bias. To store it as an unsigned
// int, we need to add the bias again.
this->IEEE.Exp = l_AdjustedExponent + BIAS;
// When storing the fraction, we abandon its least significant bits by right-shifting.
// The fraction of a double is 42 bits wider than that of a half, so we shift 42 bits.
this->IEEE.Frac = (l_Reference.IEEE.Frac >> 42);
};
}; // else usual number
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat::HalfFloat(uint16_t _m, uint16_t _e, uint16_t _s)
{
IEEE.Frac = _m;
IEEE.Exp = _e;
IEEE.Sign = _s;
}
// ------------------------------------------------------------------------------------------------
HalfFloat::operator float() const
{
IEEESingle sng;
sng.IEEE.Sign = IEEE.Sign;
if (!IEEE.Exp)
{
if (!IEEE.Frac)
{
sng.IEEE.Frac = 0;
sng.IEEE.Exp = 0;
}
else
{
const float half_denorm = (1.0f / 16384.0f);
float mantissa = ((float)(IEEE.Frac)) / 1024.0f;
float sgn = (IEEE.Sign) ? -1.0f : 1.0f;
sng.Float = sgn * mantissa*half_denorm;
}
}
else if (31 == IEEE.Exp)
{
sng.IEEE.Exp = 0xff;
sng.IEEE.Frac = (IEEE.Frac != 0) ? 1 : 0;
}
else
{
sng.IEEE.Exp = IEEE.Exp + 112;
sng.IEEE.Frac = (IEEE.Frac << 13);
}
return sng.Float;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat::operator double(void) const
{
IEEEDouble l_Result;
// Copy the sign bit.
l_Result.IEEE.Sign = this->IEEE.Sign;
// In a zero, both the exponent and the fraction are zero.
if ((0 == this->IEEE.Exp) && (0 == this->IEEE.Frac))
{
l_Result.IEEE.Exp = 0;
l_Result.IEEE.Frac = 0;
}
// If the exponent is zero and the fraction is nonzero, the number is subnormal.
else if ((0 == this->IEEE.Exp) && (0 != this->IEEE.Frac))
{
// sign * 2^-14 * fraction
l_Result.Double = (this->IEEE.Sign ? -1.0 : +1.0) / 16384.0 * (double(this->IEEE.Frac) / 1024.0);
}
// Is the exponent all one?
else if (MAX_EXPONENT_VALUE == this->IEEE.Exp)
{
l_Result.IEEE.Exp = IEEEDouble_MaxExpontent;
// A zero fraction indicates an infinite value.
if (0 == this->IEEE.Frac)
l_Result.IEEE.Frac = 0;
// A nonzero fraction indicates a NaN. We can re-use the fraction information: a double
// fraction is 42 bits wider than a half fraction, so we can just left-shift it. Any
// information on QNaNs or SNaNs will be preserved.
else
l_Result.IEEE.Frac = uint64_t(this->IEEE.Frac) << 42;
}
// A usual value?
else
{
// The exponent is stored as an unsigned int. To reconstruct its original value, we have to
// subtract its bias. To re-store it in a wider bit field, we must add the bias of the new
// bit field.
l_Result.IEEE.Exp = uint64_t(this->IEEE.Exp) - BIAS + IEEEDouble_ExponentBias;
// A double fraction is 42 bits wider than a half fraction, so we can just left-shift it.
l_Result.IEEE.Frac = uint64_t(this->IEEE.Frac) << 42;
}
return l_Result.Double;
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::IsNaN() const
{
return IEEE.Frac != 0 && IEEE.Exp == MAX_EXPONENT_VALUE;
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::IsInfinity() const
{
return IEEE.Frac == 0 && IEEE.Exp == MAX_EXPONENT_VALUE;
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::IsDenorm() const
{
return IEEE.Exp == 0;
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::GetSign() const
{
return IEEE.Sign == 0;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator= (HalfFloat other)
{
bits = other.GetBits();
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator= (float other)
{
*this = (HalfFloat)other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator= (const double p_Reference)
{
return (*this) = HalfFloat(p_Reference);
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::operator== (HalfFloat other) const
{
// +0 and -0 are considered to be equal
if (!(bits << 1u) && !(other.bits << 1u))return true;
return bits == other.bits && !this->IsNaN();
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::operator!= (HalfFloat other) const
{
// +0 and -0 are considered to be equal
if (!(bits << 1u) && !(other.bits << 1u))return false;
return bits != other.bits || this->IsNaN();
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::operator< (HalfFloat other) const
{
// NaN comparisons are always false
if (this->IsNaN() || other.IsNaN())
return false;
// this works since the segment oder is s,e,m.
return (int16_t)this->bits < (int16_t)other.GetBits();
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::operator> (HalfFloat other) const
{
// NaN comparisons are always false
if (this->IsNaN() || other.IsNaN())
return false;
// this works since the segment oder is s,e,m.
return (int16_t)this->bits > (int16_t)other.GetBits();
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::operator<= (HalfFloat other) const
{
return !(*this > other);
}
// ------------------------------------------------------------------------------------------------
inline bool HalfFloat::operator>= (HalfFloat other) const
{
return !(*this < other);
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator += (HalfFloat other)
{
*this = (*this) + other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator -= (HalfFloat other)
{
*this = (*this) - other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator *= (HalfFloat other)
{
*this = (float)(*this) * (float)other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator /= (HalfFloat other)
{
*this = (float)(*this) / (float)other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator += (float other)
{
*this = (*this) + (HalfFloat)other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator -= (float other)
{
*this = (*this) - (HalfFloat)other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator *= (float other)
{
*this = (float)(*this) * other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator /= (float other)
{
*this = (float)(*this) / other;
return *this;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator++()
{
// setting the exponent to bias means using 0 as exponent - thus we
// can set the mantissa to any value we like, we'll always get 1.0
return this->operator+=(HalfFloat(0, BIAS, 0));
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat HalfFloat::operator++(int)
{
HalfFloat f = *this;
this->operator+=(HalfFloat(0, BIAS, 0));
return f;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat& HalfFloat::operator--()
{
return this->operator-=(HalfFloat(0, BIAS, 0));
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat HalfFloat::operator--(int)
{
HalfFloat f = *this;
this->operator-=(HalfFloat(0, BIAS, 0));
return f;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat HalfFloat::operator-() const
{
return HalfFloat(IEEE.Frac, IEEE.Exp, ~IEEE.Sign);
}
// ------------------------------------------------------------------------------------------------
inline uint16_t HalfFloat::GetBits() const
{
return bits;
}
// ------------------------------------------------------------------------------------------------
inline uint16_t& HalfFloat::GetBits()
{
return bits;
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat operator+ (HalfFloat one, HalfFloat two)
{
#if (!defined HALFFLOAT_NO_CUSTOM_IMPLEMENTATIONS)
if (one.IEEE.Exp == HalfFloat::MAX_EXPONENT_VALUE)
{
// if one of the components is NaN the result becomes NaN, too.
if (0 != one.IEEE.Frac || two.IsNaN())
return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
// otherwise this must be infinity
return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, one.IEEE.Sign | two.IEEE.Sign);
}
else if (two.IEEE.Exp == HalfFloat::MAX_EXPONENT_VALUE)
{
if (one.IsNaN() || 0 != two.IEEE.Frac)
return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, one.IEEE.Sign | two.IEEE.Sign);
}
HalfFloat out;
long m1, m2, temp;
// compute the difference between the two exponents. shifts with negative
// numbers are undefined, thus we need two code paths
register int expDiff = one.IEEE.Exp - two.IEEE.Exp;
if (0 == expDiff)
{
// the exponents are equal, thus we must just add the hidden bit
temp = two.IEEE.Exp;
if (0 == one.IEEE.Exp)m1 = one.IEEE.Frac;
else m1 = (int)one.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
if (0 == two.IEEE.Exp)m2 = two.IEEE.Frac;
else m2 = (int)two.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
}
else
{
if (expDiff < 0)
{
expDiff = -expDiff;
std::swap(one, two);
}
m1 = (int)one.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
if (0 == two.IEEE.Exp)m2 = two.IEEE.Frac;
else m2 = (int)two.IEEE.Frac | (1 << HalfFloat::BITS_MANTISSA);
if (expDiff < ((sizeof(long) << 3) - (HalfFloat::BITS_MANTISSA + 1)))
{
m1 <<= expDiff;
temp = two.IEEE.Exp;
}
else
{
if (0 != two.IEEE.Exp)
{
// arithmetic underflow
if (expDiff > HalfFloat::BITS_MANTISSA)return HalfFloat(0, 0, 0);
else
{
m2 >>= expDiff;
}
}
temp = one.IEEE.Exp;
}
}
// convert from sign-bit to two's complement representation
if (one.IEEE.Sign)m1 = -m1;
if (two.IEEE.Sign)m2 = -m2;
m1 += m2;
if (m1 < 0)
{
out.IEEE.Sign = 1;
m1 = -m1;
}
else out.IEEE.Sign = 0;
// and renormalize the result to fit in a half
if (0 == m1)return HalfFloat(0, 0, 0);
#ifdef _MSC_VER
_BitScanReverse((unsigned long*)&m2, m1);
#else
m2 = __builtin_clz(m1);
#endif
expDiff = m2 - HalfFloat::BITS_MANTISSA;
temp += expDiff;
if (expDiff >= HalfFloat::MAX_EXPONENT_VALUE)
{
// arithmetic overflow. return INF and keep the sign
return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, out.IEEE.Sign);
}
else if (temp <= 0)
{
// this maps to a denorm
m1 <<= (-expDiff - 1);
temp = 0;
}
else
{
// rebuild the normalized representation, take care of the hidden bit
if (expDiff < 0)m1 <<= (-expDiff);
else m1 >>= expDiff; // m1 >= 0
}
out.IEEE.Frac = m1;
out.IEEE.Exp = temp;
return out;
#else
return HalfFloat((float)one + (float)two);
#endif
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat operator- (HalfFloat one, HalfFloat two)
{
return HalfFloat(one + (-two));
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat operator* (HalfFloat one, HalfFloat two)
{
return HalfFloat((float)one * (float)two);
}
// ------------------------------------------------------------------------------------------------
inline HalfFloat operator/ (HalfFloat one, HalfFloat two)
{
return HalfFloat((float)one / (float)two);
}
// ------------------------------------------------------------------------------------------------
inline float operator+ (HalfFloat one, float two)
{
return (float)one + two;
}
// ------------------------------------------------------------------------------------------------
inline float operator- (HalfFloat one, float two)
{
return (float)one - two;
}
// ------------------------------------------------------------------------------------------------
inline float operator* (HalfFloat one, float two)
{
return (float)one * two;
}
// ------------------------------------------------------------------------------------------------
inline float operator/ (HalfFloat one, float two)
{
return (float)one / two;
}
// ------------------------------------------------------------------------------------------------
inline float operator+ (float one, HalfFloat two)
{
return two + one;
}
// ------------------------------------------------------------------------------------------------
inline float operator- (float one, HalfFloat two)
{
return two - one;
}
// ------------------------------------------------------------------------------------------------
inline float operator* (float one, HalfFloat two)
{
return two * one;
}
// ------------------------------------------------------------------------------------------------
inline float operator/ (float one, HalfFloat two)
{
return two / one;
}
#endif //!! UM_HALF_INL_INCLUDED
halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
bc68 342d ae59 bcd7 b46a 3c1c 2c25 beb9 bcaf 3d72 3fc2 38d0 bd6b bce4 3854 ad13
This source diff could not be displayed because it is too large. You can view the blob instead.
#include <stdio.h>
#include <direct.h>
#include "../../core/CHeader.h"
#include "../../core/utilities/FlushToMem.h"
#include "../../core/getandset/ConvertDataType.h"
#include "../../XTensor.h"
#include "../../XGlobal.h"
using namespace nts;
int main(int argc, const char ** argv) {
char *path;
path = getcwd(NULL, 0);
strcat(path, "\\source\\tensor\\halfLib\\half\\dump");
int dim = 4;
int devId = 0;
XTensor a;
XTensor b;
XTensor c;
XTensor halfa;
XTensor halfb;
XTensor halfc;
InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
InitTensor2DV2(&c, dim, dim, X_FLOAT, devId);
InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
a.SetDataRand(-2.0, 2.0);
c.SetDataRand(-2.0, 2.0);
halfa = ConvertDataType(a, X_FLOAT16);
halfc = ConvertDataType(c, X_FLOAT16);
printf("============save model================\n");
halfa.Dump(&halfa, stderr, "halfa:");
GPUToCPUFlush(&halfa);
FILE * file = fopen(path, "wb");
halfa.Dump(file, "halfa:");
//a.Dump(file, "a");
fclose(file);
printf("==============read model=============\n");
FILE *read = fopen(path, "rb");
halfb.Read(read, "halfa:");
//b.Read(read, "a");
fclose(read);
halfb.Dump(&halfb, stderr, "halfb:");
printf("==============BMMUL=============\n");
b = BMMul(a, X_NOTRANS, c, X_NOTRANS);
b.Dump(stderr,"b:");
printf("==============BMMUL-float=============\n");
halfa= BMMul(halfb, X_NOTRANS, halfc, X_NOTRANS);
halfa.Dump(&halfa, stderr, "halfla:");
return 0;
}
\ No newline at end of file
#include <stdlib.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <time.h>
#include <cuda_fp16.h>
//#ifndef HALF_ARITHMETIC_TYPE
//#define HALF_ARITHMETIC_TYPE
//#endif // !HALF_ARITHMETIC_TYPE
#include "half.hpp"
using half_float::halfFloat;
typedef half_float::halfFloat halfC;
__global__ void matrixMulKernel(__half *C, __half *A, __half *B) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
C[i] = A[i] * B[i];
}
void constantInit(halfC *data, int size, halfC val) {
for (int i = 0; i < size; ++i) {
data[i] = val;
}
}
void matrixMul() {
unsigned int N = 128;
unsigned int size = N * sizeof(halfC);
halfC *h_A = (halfC*)malloc(size);
halfC *h_B = (halfC*)malloc(size);
halfC *h_C = (halfC*)malloc(size);
halfC *h_D = (halfC*)malloc(size);
// Initialize host memory
const halfC valB = (halfC)0.01f;
constantInit(h_A, N, (halfC)1.0f);
constantInit(h_B, N, valB);
__half *d_A, *d_B, *d_C;
cudaMalloc((void**)&d_A, size);
cudaMalloc((void**)&d_B, size);
cudaMalloc((void**)&d_C, size);
//copy host memory to device
cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
//config dims
dim3 block(16, 16);
dim3 grid(N / block.x, N / block.y);
// Excute the kernel
matrixMulKernel << <grid, block >> > (d_C, d_A, d_B);
// Copy the memory from device to host
cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
//printf("Checking computed result for correctness: ");
//bool correct = true;
//// test relative error by the formula
//// |<x, y>_cpu - <x,y>_gpu|/<|x|, |y|> < eps
//double eps = 1.e-6; // machine zero
for (int k = 0; k < N; k++) {
h_D[k] = h_A[k] * h_B[k];
}
for (int i = 0; i < N; i++) {
printf("%hx--%hx ", h_C[i], h_D[i]);
if ((i + 1) % 8 == 0)
printf("\n");
}
//for (int i = 0; i < width*height; i++) {
// double abs_err = fabs(h_C[i] - (width * valB));
// double dot_length = width;
// double abs_val = fabs(h_C[i]);
// double rel_err = abs_err / abs_val / dot_length;
// if (rel_err > eps)
// {
// printf("Error! Matrix[%05d]=%.8f, ref=%.8f error term is > %E\n", i, h_C[i], (float)(width*height), eps);
// correct = false;
// }
//}
//printf("%s\n", correct ? "Result = PASS" : "Result = FAIL");
// Free
free(h_A);
free(h_B);
free(h_C);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
}
int main() {
matrixMul();
}
//
//#define THREAD_NUM 256
//#define MATRIX_SIZE 4
//const halfC blocks_num = MATRIX_SIZE * (MATRIX_SIZE + THREAD_NUM - 1) / THREAD_NUM;
//
//__global__ static void matMultCUDA(const __half* a, const __half* b, __half* c, halfC n, clock_t* time)
//{
//
// //表示目前的 thread 是第几个 thread(由 0 开始计算)
// const halfC tid = threadIdx.x;
//
// //表示目前的 thread 属于第几个 block(由 0 开始计算)
// const halfC bid = blockIdx.x;
//
// //从 bid 和 tid 计算出这个 thread 应该计算的 row 和 column
// const halfC idx = bid * THREAD_NUM + tid;
// const halfC row = idx / n;
// const halfC column = idx % n;
//
// halfC i;
//
// //记录运算开始的时间
// clock_t start;
//
// //只在 thread 0(即 threadIdx.x = 0 的时候)进行记录,每个 block 都会记录开始时间及结束时间
// if (tid == 0)
// time[bid] = clock();
//
// //计算矩阵乘法
// if (row < n && column < n)
// {
// __half t = __half(0.0);
// for (i = 0; i < n; i++)
// {
// t += a[row * n + i] * b[i * n + column];
// }
// c[row * n + column] = t;
// }
//
// //计算时间,记录结果,只在 thread 0(即 threadIdx.x = 0 的时候)进行,每个 block 都会记录开始时间及结束时间
// if (tid == 0)
// {
// time[bid + blocks_num] = clock();
// }
//}
//
//bool InitCuda() {
// halfC count;
// halfC device;
// cudaGetDeviceCount(&count);
// if (count == 0) {
// fprhalfCf(stderr, "There is no device !\n");
// }
// else
// device = 1;
// cudaSetDevice(device);
// return true;
//}
//template <class T >
//void matgen(T *a, halfC n) {
// halfC i, j;
// for (i = 0; i < n; i++) {
// for (j = 0; j < n; j++) {
// a[i * n + j] = (T)rand() / (0x7FFF) + (halfC)rand() / (0x7FFF * 0x7FFF);
// }
// }
//}
//
//
//halfC main(halfC argc, char **argv) {
//
// //CUDA 初始化
// if (!InitCuda())
// return 0;
//
// //定义矩阵
// halfC *a, *b, *c, *d;
//
// halfC n = MATRIX_SIZE;
//
// //分配内存
// a = (halfC*)malloc(sizeof(halfC)* n * n);
// b = (halfC*)malloc(sizeof(halfC)* n * n);
// c = (halfC*)malloc(sizeof(halfC)* n * n);
// d = (halfC*)malloc(sizeof(halfC)* n * n);
//
// //设置随机数种子
// srand(0);
//
// //随机生成矩阵
// matgen(a, n);
// matgen(b, n);
//
// for (halfC i = 0; i < n; i++)
// {
// for (halfC j = 0; j < n; j++)
// {
// prhalfCf("%x ", a[i * n + j]);
// }
// prhalfCf("\n");
// }
//
// ///*把数据复制到显卡内存中*/
// __half *cuda_a, *cuda_b, *cuda_c;
//
// clock_t* time;
//
// //cudaMalloc 取得一块显卡内存
// cudaMalloc((void**)&cuda_a, sizeof(__half)* n * n);
// cudaMalloc((void**)&cuda_b, sizeof(__half)* n * n);
// cudaMalloc((void**)&cuda_c, sizeof(__half)* n * n);
//
// cudaMalloc((void**)&time, sizeof(clock_t)* blocks_num * 2);
//
// //cudaMemcpy 将产生的矩阵复制到显卡内存中
// //cudaMemcpyHostToDevice - 从内存复制到显卡内存
// //cudaMemcpyDeviceToHost - 从显卡内存复制到内存
// cudaMemcpy(cuda_a, a, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
// cudaMemcpy(cuda_b, b, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
//
// // 在CUDA 中执行函数 语法:函数名称<<<block 数目, thread 数目, shared memory 大小>>>(参数...);
// matMultCUDA << < blocks_num, THREAD_NUM, 0 >> > (cuda_a, cuda_b, cuda_c, n, time);
//
// /*把结果从显示芯片复制回主内存*/
//
// clock_t time_use[blocks_num * 2];
//
// //cudaMemcpy 将结果从显存中复制回内存
// cudaMemcpy(c, cuda_c, sizeof(halfC)* n * n, cudaMemcpyDeviceToHost);
// cudaMemcpy(&time_use, time, sizeof(clock_t)* blocks_num * 2, cudaMemcpyDeviceToHost);
//
// for (halfC i = 0; i < n; i++)
// {
// for (halfC j = 0; j < n; j++)
// {
// prhalfCf("%x ", c[i * n + j]);
// }
// prhalfCf("\n");
// }
//
// //Free cuda
// cudaFree(cuda_a);
// cudaFree(cuda_b);
// cudaFree(cuda_c);
// cudaFree(time);
////把每个 block 最早的开始时间,和最晚的结束时间相减,取得总运行时间
//clock_t min_start, max_end;
//min_start = time_use[0];
//max_end = time_use[blocks_num];
//for (halfC i = 1; i < blocks_num; i++)
//{
// if (min_start > time_use[i]) min_start = time_use[i];
// if (max_end < time_use[i + blocks_num]) max_end = time_use[i + blocks_num];
//}
////核函数运行时间
//clock_t final_time = max_end - min_start;
////CPU矩阵乘法,存入矩阵d
//for (halfC i = 0; i < n; i++)
//{
// for (halfC j = 0; j < n; j++)
// {
// double t = 0;
// for (halfC k = 0; k < n; k++){
// t += a[i * n + k] * b[k * n + j];
// }
// d[i * n + j] = t;
// }
//}
////验证正确性与精确性
//halfC max_err = (halfC)0.0;
//halfC average_err = (halfC)0;
//for (halfC i = 0; i < n; i++)
//{
// for (halfC j = 0; j < n; j++)
// {
// if (d[i * n + j] != 0)
// {
// //fabs求浮点数x的绝对值
// halfC err = fabs((c[i * n + j] - d[i * n + j]) / d[i * n + j]);
// if (max_err < err) max_err = err;
// average_err += err;
// }
// }
//}
//prhalfCf("Max error: %g Average error: %g\n", max_err, average_err / (n * n));
//prhalfCf("gputime: %d\n", final_time);
//
// return 0;
//}
\ No newline at end of file
// test - Test application for half-precision floating point functionality.
//
// Copyright (c) 2012-2019 Christian Rau <rauy@users.sourceforge.net>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//#define HALF_ENABLE_F16C_INTRINSICS 1
//#define HALF_ARITHMETIC_TYPE float
#define HALF_ROUND_STYLE 1
#include "half.hpp"
#include <utility>
#include <vector>
#include <string>
#include <map>
#include <set>
#include <iostream>
#include <iomanip>
#include <memory>
#include <algorithm>
#include <numeric>
#include <iterator>
#include <functional>
#include <fstream>
#include <random>
#include <bitset>
#include <limits>
#include <chrono>
#include <typeinfo>
#include <stdexcept>
#include <cstdint>
#include <cmath>
#if HALF_ENABLE_CPP11_HASH
#include <unordered_map>
#endif
#if HALF_ENABLE_CPP11_CMATH && !defined(HALF_ARITHMETIC_TYPE)
#include <cfenv>
#pragma STDC FENV_ACCESS ON
#endif
int ilog2(int i)
{
unsigned int l = 0;
for (; i > 0; i >>= 1, ++l);
return l;
}
#define UNARY_PERFORMANCE_TEST(func, x, N) { \
auto start = std::chrono::high_resolution_clock::now(); \
for(unsigned int i=0; i<N; ++i) for(unsigned int h=0; h<x.size(); ++h) results[h] = func(x[h]); \
auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
log_ << #func << "\tx " << N << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #func << ';' << tm << '\n'; }
#define BINARY_PERFORMANCE_TEST(func, x, y, N) { \
auto start = std::chrono::high_resolution_clock::now(); \
for(unsigned int i=0; i<x.size(); i+=N) for(unsigned int j=0; j<y.size(); j+=N) results[j] = func(x[i], y[j]); \
auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
log_ << #func << "\t@ 1/" << (N*N) << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #func << ';' << tm << '\n'; }
#define OPERATOR_PERFORMANCE_TEST(op, x, y, N) { \
auto start = std::chrono::high_resolution_clock::now(); \
for(unsigned int i=0; i<x.size(); i+=N) for(unsigned int j=0; j<y.size(); j+=N) results[j] = x[i] op y[j]; \
auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
log_ << #op << "\t@ 1/" << (N*N) << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #op << ';' << tm << '\n'; }
#define TERNARY_PERFORMANCE_TEST(func, x, y, z, N) { \
auto start = std::chrono::high_resolution_clock::now(); \
for(unsigned int i=0; i<x.size(); i+=N) for(unsigned int j=0; j<y.size(); j+=N) for(unsigned int k=0; k<z.size(); k+=N) results[k] = func(x[i], y[j], z[k]); \
auto tm = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()-start).count(); \
log_ << #func << "\t@ 1/" << (N*N*N) << ":\t" << tm << "\n\n"; if(csv_) *csv_ << #func << ';' << tm << '\n'; }
using half_float::half;
using half_float::half_cast;
#if HALF_ENABLE_CPP11_USER_LITERALS
using namespace half_float::literal;
#endif
half b2h(std::uint16_t bits)
{
return *reinterpret_cast<half*>(&bits);
}
std::uint16_t h2b(half h)
{
return *reinterpret_cast<std::uint16_t*>(&h);
}
bool comp(half a, half b)
{
return (isnan(a) && isnan(b)) || h2b(a) == h2b(b);
}
bool compz(half a, half b)
{
return (isnan(a) && isnan(b)) || a == b;
}
template<std::float_round_style R> half select(const std::pair<half, half> &hh)
{
return (R == std::round_toward_zero && abs(hh.first) > abs(hh.second)) ||
(R == std::round_toward_infinity && hh.second > hh.first) ||
(R == std::round_toward_neg_infinity && hh.second <= hh.first) ?
hh.second : hh.first;
}
class half_test
{
public:
half_test(std::ostream &log, std::ostream *csv, bool fast, bool rough)
: tests_(0), log_(log), csv_(csv), fast_(fast), rough_(rough)
{
//prepare halfs
half_vector batch;
std::uint16_t u = 0;
halfs_.insert(std::make_pair("positive zero", half_vector(1, b2h(u++))));
for (; u < 0x400; ++u)
batch.push_back(b2h(u));
halfs_.insert(std::make_pair("positive subn", std::move(batch)));
batch.clear();
for (; u < 0x7C00; ++u)
batch.push_back(b2h(u));
halfs_.insert(std::make_pair("positive norm", std::move(batch)));
batch.clear();
halfs_.insert(std::make_pair("positive inft", half_vector(1, b2h(u++))));
for (; u < 0x8000; ++u)
batch.push_back(b2h(u));
halfs_.insert(std::make_pair("positive NaN", std::move(batch)));
batch.clear();
halfs_.insert(std::make_pair("negative zero", half_vector(1, b2h(u++))));
for (; u < 0x8400; ++u)
batch.push_back(b2h(u));
halfs_.insert(std::make_pair("negative subn", std::move(batch)));
batch.clear();
for (; u < 0xFC00; ++u)
batch.push_back(b2h(u));
halfs_.insert(std::make_pair("negative norm", std::move(batch)));
batch.clear();
halfs_.insert(std::make_pair("negative inft", half_vector(1, b2h(u++))));
for (; u != 0; ++u)
batch.push_back(b2h(u));
halfs_.insert(std::make_pair("negative NaN", std::move(batch)));
//set classes
classes_["positive zero"] = FP_ZERO;
classes_["positive subn"] = FP_SUBNORMAL;
classes_["positive norm"] = FP_NORMAL;
classes_["positive inft"] = FP_INFINITE;
classes_["positive NaN"] = FP_NAN;
classes_["negative zero"] = FP_ZERO;
classes_["negative subn"] = FP_SUBNORMAL;
classes_["negative norm"] = FP_NORMAL;
classes_["negative inft"] = FP_INFINITE;
classes_["negative NaN"] = FP_NAN;
}
unsigned int test()
{
/*
//test size
simple_test("size", []() { return sizeof(half)*CHAR_BIT >= 16; });
//test conversion
unary_test("float conversion", [](half arg) { return comp(half_cast<half>(half_cast<float>(arg)), arg); });
unary_test("double conversion", [](half arg) { return comp(half_cast<half>(half_cast<double>(arg)), arg); });
unary_test("long double conversion", [](half arg) { return comp(half_cast<half>(half_cast<long double>(arg)), arg); });
//test classification
class_test("fpclassify", [](half arg, int cls) { return fpclassify(arg) == cls; });
class_test("isfinite", [](half arg, int cls) { return isfinite(arg) == (cls!=FP_INFINITE&&cls!=FP_NAN); });
class_test("isinf", [](half arg, int cls) { return isinf(arg) == (cls==FP_INFINITE); });
class_test("isnan", [](half arg, int cls) { return isnan(arg) == (cls==FP_NAN); });
class_test("isnormal", [](half arg, int cls) { return isnormal(arg) == (cls==FP_NORMAL); });
unary_test("signbit", [](half arg) -> bool { double f = arg; return isnan(arg) || f==0.0 || (signbit(arg)==(f<0.0)); });
//test operators
unary_test("prefix increment", [](half arg) -> bool { double f = half_cast<double>(arg);
return comp(static_cast<half>(++f), ++arg) && comp(half_cast<half>(f), arg); });
unary_test("prefix decrement", [](half arg) -> bool { double f = half_cast<double>(arg);
return comp(static_cast<half>(--f), --arg) && comp(half_cast<half>(f), arg); });
unary_test("postfix increment", [](half arg) -> bool { double f = half_cast<double>(arg);
return comp(static_cast<half>(f++), arg++) && comp(half_cast<half>(f), arg); });
unary_test("postfix decrement", [](half arg) -> bool { double f = half_cast<double>(arg);
return comp(static_cast<half>(f--), arg--) && comp(half_cast<half>(f), arg); });
unary_test("unary plus", [](half arg) { return comp(+arg, arg); });
unary_test("unary minus", [](half arg) { return comp(-arg, half_cast<half>(-half_cast<double>(arg))); });
binary_test("addition", [](half a, half b) { return comp(a+b, half_cast<half>(half_cast<double>(a)+half_cast<double>(b))); });
binary_test("subtraction", [](half a, half b) { return comp(a-b, half_cast<half>(half_cast<double>(a)-half_cast<double>(b))); });
binary_test("multiplication", [](half a, half b) { return comp(a*b, half_cast<half>(half_cast<double>(a)*half_cast<double>(b))); });
binary_test("division", [](half a, half b) { return comp(a/b, half_cast<half>(half_cast<double>(a)/half_cast<double>(b))); });
binary_test("equal", [](half a, half b) { return (a==b) == (half_cast<double>(a)==half_cast<double>(b)); });
binary_test("not equal", [](half a, half b) { return (a!=b) == (half_cast<double>(a)!=half_cast<double>(b)); });
binary_test("less", [](half a, half b) { return (a<b) == (half_cast<double>(a)<half_cast<double>(b)); });
binary_test("greater", [](half a, half b) { return (a>b) == (half_cast<double>(a)>half_cast<double>(b)); });
binary_test("less equal", [](half a, half b) { return (a<=b) == (half_cast<double>(a)<=half_cast<double>(b)); });
binary_test("greater equal", [](half a, half b) { return (a>=b) == (half_cast<double>(a)>=half_cast<double>(b)); });
//test basic functions
unary_test("abs", [](half arg) { return comp(abs(arg), half_cast<half>(std::abs(half_cast<double>(arg)))); });
unary_test("fabs", [](half arg) { return comp(fabs(arg), half_cast<half>(std::fabs(half_cast<double>(arg)))); });
binary_test("fmod", [](half x, half y) { return comp(fmod(x, y), half_cast<half>(std::fmod(half_cast<double>(x), half_cast<double>(y)))); });
binary_test("fdim", [](half a, half b) -> bool { half c = fdim(a, b); return isnan(a) || isnan(b) ||
(isinf(a) && isinf(b) && signbit(a)==signbit(b)) || ((a>b) && comp(c, a-b)) || ((a<=b) && comp(c, half_cast<half>(0.0))); });
ternary_test("fma", [](half x, half y, half z) { return comp(fma(x, y, z), half_cast<half>(half_cast<double>(x)*half_cast<double>(y)+half_cast<double>(z))); });
// ternary_reference_test("fma", half_float::fma);
//test exponential functions
unary_reference_test("exp", half_float::exp);
unary_reference_test("exp2", half_float::exp2);
unary_reference_test("expm1", half_float::expm1);
unary_reference_test("log", half_float::log);
unary_reference_test("log10", half_float::log10);
unary_reference_test("log1p", half_float::log1p);
unary_reference_test("log2", half_float::log2);
//test power functions
unary_reference_test("sqrt", half_float::sqrt);
unary_reference_test("cbrt", half_float::cbrt);
binary_reference_test("pow", half_float::pow);
binary_reference_test<half(half,half)>("hypot", half_float::hypot);
// ternary_reference_test<half(half,half,half)>("hypot3", half_float::hypot);
//test trig functions
unary_reference_test("sin", half_float::sin);
unary_reference_test("cos", half_float::cos);
unary_reference_test("tan", half_float::tan);
unary_reference_test("asin", half_float::asin);
unary_reference_test("acos", half_float::acos);
unary_reference_test("atan", half_float::atan);
binary_reference_test("atan2", half_float::atan2);
//test hyp functions
unary_reference_test("sinh", half_float::sinh);
unary_reference_test("cosh", half_float::cosh);
unary_reference_test("tanh", half_float::tanh);
unary_reference_test("asinh", half_float::asinh);
unary_reference_test("acosh", half_float::acosh);
unary_reference_test("atanh", half_float::atanh);
//test err functions
unary_reference_test("erf", half_float::erf);
unary_reference_test("erfc", half_float::erfc);
unary_reference_test("lgamma", half_float::lgamma);
unary_reference_test("tgamma", half_float::tgamma);
//test round functions
unary_test("ceil", [](half arg) { return comp(ceil(arg), half_cast<half>(std::ceil(half_cast<double>(arg)))); });
unary_test("floor", [](half arg) { return comp(floor(arg), half_cast<half>(std::floor(half_cast<double>(arg)))); });
unary_test("trunc", [](half arg) { return !isfinite(arg) || compz(trunc(arg), half_cast<half>(static_cast<int>(arg))); });
unary_test("round", [](half arg) { return !isfinite(arg) || compz(round(arg),
half_cast<half>(static_cast<int>(static_cast<double>(arg)+(signbit(arg) ? -0.5 : 0.5)))); });
unary_test("lround", [](half arg) { return !isfinite(arg) || lround(arg) ==
static_cast<long>(static_cast<double>(arg)+(signbit(arg) ? -0.5 : 0.5)); });
unary_test("nearbyint", [](half arg) { return !isfinite(arg) || compz(nearbyint(arg), half_cast<half>(half_cast<int>(arg))); });
unary_test("rint", [](half arg) { return !isfinite(arg) || compz(rint(arg), half_cast<half>(half_cast<int>(arg))); });
unary_test("lrint", [](half arg) { return !isfinite(arg) || lrint(arg) == half_cast<long>(arg); });
#if HALF_ENABLE_CPP11_LONG_LONG
unary_test("llround", [](half arg) { return !isfinite(arg) || llround(arg) ==
static_cast<long long>(static_cast<double>(arg)+(signbit(arg) ? -0.5 : 0.5)); });
unary_test("llrint", [](half arg) { return !isfinite(arg) || llrint(arg) == half_cast<long long>(arg); });
#endif
//test float functions
unary_test("frexp", [](half arg) -> bool { int eh, ef; bool eq = comp(frexp(arg, &eh),
static_cast<half>(std::frexp(static_cast<double>(arg), &ef))); return eq && (!isfinite(arg) || eh==ef); });
unary_test("ldexp", [](half arg) -> bool { unsigned int passed = 0; for(int i=-50; i<50; ++i) passed +=
comp(ldexp(arg, i), static_cast<half>(std::ldexp(static_cast<double>(arg), i))); return passed==100; });
unary_test("modf", [](half arg) -> bool { half h; double f; return comp(modf(arg, &h), static_cast<half>(
std::modf(static_cast<double>(arg), &f))) && comp(h, static_cast<half>(f)); });
binary_test("nextafter", [](half a, half b) -> bool { half c = nextafter(a, b); std::int16_t d = std::abs(
static_cast<std::int16_t>(h2b(a)-h2b(c))); return ((isnan(a) || isnan(b)) && isnan(c)) ||
(compz(a, b) && compz(b, c)) || ((d==1||d==0x7FFF) && (a<b)==(a<c)); });
binary_test("nexttoward", [](half a, half b) -> bool { half c = nexttoward(a, static_cast<long double>(b)); std::int16_t d = std::abs(
static_cast<std::int16_t>(h2b(a)-h2b(c))); return ((isnan(a) || isnan(b)) && isnan(c)) ||
(compz(a, b) && compz(b, c)) || ((d==1||d==0x7FFF) && (a<b)==(a<c)); });
binary_test("copysign", [](half a, half b) -> bool { half h = copysign(a, b); return comp(abs(h), abs(a)) && signbit(h)==signbit(b); });
#if HALF_ENABLE_CPP11_CMATH
//test basic functions
binary_test("remainder", [](half x, half y) { return comp(remainder(x, y), half_cast<half>(std::remainder(half_cast<double>(x), half_cast<double>(y)))); });
binary_test("remquo", [](half a, half b) -> bool { int qh = 0, qf = 0; return comp(remquo(a, b, &qh),
half_cast<half>(std::remquo(static_cast<double>(a), static_cast<double>(b), &qf))) && (qh&7)==(qf&7); });
binary_test("fmin", [](half x, half y) { return comp(fmin(x, y), half_cast<half>(std::fmin(half_cast<double>(x), half_cast<double>(y)))); });
binary_test("fmax", [](half x, half y) { return comp(fmax(x, y), half_cast<half>(std::fmax(half_cast<double>(x), half_cast<double>(y)))); });
binary_test("fdim", [](half x, half y) { return comp(fdim(x, y), half_cast<half>(std::fdim(half_cast<double>(x), half_cast<double>(y)))); });
ternary_test("fma", [](half x, half y, half z) { return comp(fma(x, y, z), half_cast<half>(std::fma(half_cast<double>(x), half_cast<double>(y), half_cast<double>(z)))); });
//test round functions
unary_test("trunc", [](half arg) { return comp(trunc(arg), half_cast<half>(std::trunc(half_cast<double>(arg)))); });
unary_test("round", [](half arg) { return comp(round(arg), half_cast<half>(std::round(half_cast<double>(arg)))); });
unary_test("lround", [](half arg) { return !isfinite(arg) || lround(arg) == std::lround(static_cast<double>(arg)); });
unary_test("llround", [](half arg) { return !isfinite(arg) || llround(arg) == std::llround(static_cast<double>(arg)); });
#if HALF_ROUND_STYLE == 1
unary_test("nearbyint", [](half arg) { return comp(nearbyint(arg), half_cast<half>(std::nearbyint(half_cast<double>(arg)))); });
unary_test("rint", [](half arg) { return comp(rint(arg), half_cast<half>(std::rint(half_cast<double>(arg)))); });
unary_test("lrint", [](half arg) { return !isfinite(arg) || half_float::lrint(arg) == std::lrint(static_cast<double>(arg)); });
unary_test("llrint", [](half arg) { return !isfinite(arg) || llrint(arg) == std::llrint(static_cast<double>(arg)); });
#endif
//test float functions
unary_test("scalbn", [](half arg) -> bool { unsigned int passed = 0; for(int i=-50; i<50; ++i) passed +=
comp(scalbn(arg, i), static_cast<half>(std::scalbn(static_cast<double>(arg), i))); return passed==100; });
unary_test("scalbln", [](half arg) -> bool { unsigned int passed = 0; for(long i=-50; i<50; ++i) passed +=
comp(scalbln(arg, i), static_cast<half>(std::scalbln(static_cast<double>(arg), i))); return passed==100; });
unary_test("ilogb", [](half arg) { return ilogb(arg) == std::ilogb(static_cast<double>(arg)); });
unary_test("logb", [](half arg) { return comp(logb(arg), static_cast<half>(std::logb(static_cast<double>(arg)))); });
binary_test("copysign", [](half a, half b) { return comp(copysign(a, b),
static_cast<half>(std::copysign(static_cast<double>(a), static_cast<double>(b)))); });
//test classification functions
unary_test("fpclassify", [](half arg) -> bool { int ch=fpclassify(arg), cf=std::fpclassify(
static_cast<double>(arg)); return ch==cf || (ch==FP_SUBNORMAL && cf==FP_NORMAL); });
unary_test("isfinite", [](half arg) { return isfinite(arg) == std::isfinite(static_cast<double>(arg)); });
unary_test("isinf", [](half arg) { return isinf(arg) == std::isinf(static_cast<double>(arg)); });
unary_test("isnan", [](half arg) { return isnan(arg) == std::isnan(static_cast<double>(arg)); });
unary_test("isnormal", [](half arg) { return isnormal(arg) == std::isnormal(static_cast<double>(arg)) ||
(!isnormal(arg) && fpclassify(arg)==FP_SUBNORMAL); });
unary_test("signbit", [](half arg) { return signbit(arg) == std::signbit(static_cast<double>(arg)); });
//test comparison functions
binary_test("isgreater", [](half a, half b) { return isgreater(a, b) == std::isgreater(static_cast<double>(a), static_cast<double>(b)); });
binary_test("isgreaterequal", [](half a, half b) { return isgreaterequal(a, b) == std::isgreaterequal(static_cast<double>(a), static_cast<double>(b)); });
binary_test("isless", [](half a, half b) { return isless(a, b) == std::isless(static_cast<double>(a), static_cast<double>(b)); });
binary_test("islessequal", [](half a, half b) { return islessequal(a, b) == std::islessequal(static_cast<double>(a), static_cast<double>(b)); });
binary_test("islessgreater", [](half a, half b) { return islessgreater(a, b) == std::islessgreater(static_cast<double>(a), static_cast<double>(b)); });
binary_test("isunordered", [](half a, half b) { return isunordered(a, b) == std::isunordered(static_cast<double>(a), static_cast<double>(b)); });
#endif
//test rounding
float_test("round_to_nearest", [](float f) -> bool { half a = half_cast<half,std::round_indeterminate>(f),
b(nextafter(a, copysign(std::numeric_limits<half>::infinity(), a))), h = half_cast<half,std::round_to_nearest>(f);
float af(a), bf(b), hf(h); return half_float::detail::builtin_isnan(f) || (std::abs(hf)>std::abs(f)&&comp(h, b)&&((std::abs(f-af)>std::abs(bf-f) ||
(std::abs(f-af)==std::abs(bf-f)&&!(h2b(h)&1)))||isinf(h))) || (std::abs(hf)<=std::abs(f)&&comp(h, a)&&((std::abs(f-af)<std::abs(bf-f) ||
(std::abs(f-af)==std::abs(bf-f)&&!(h2b(h)&1)))||isinf(h))); });
float_test("round_toward_zero", [](float f) -> bool { half a = half_cast<half,std::round_indeterminate>(f),
h = half_cast<half,std::round_toward_zero>(f); float af(a), hf(h); return half_float::detail::builtin_isnan(f) || isinf(a) || af == hf; });
float_test("round_toward_infinity", [](float f) -> bool { half a = half_cast<half,std::round_toward_zero>(f),
b(nextafter(a, copysign(std::numeric_limits<half>::infinity(), a))), h = half_cast<half,std::round_toward_infinity>(f);
float hf(h); return half_float::detail::builtin_isnan(f) || (comp(h, a)&&(signbit(h)||hf==f)) || (comp(h, b)&&!signbit(h)&&hf>f); });
float_test("round_toward_neg_infinity", [](float f) -> bool { half a = half_cast<half,std::round_toward_zero>(f),
b(nextafter(a, copysign(std::numeric_limits<half>::infinity(), a))), h = half_cast<half,std::round_toward_neg_infinity>(f);
float hf(h); return half_float::detail::builtin_isnan(f) || (comp(h, a)&&(!signbit(h)||hf==f)) || (comp(h, b)&&signbit(h)&&hf<f); });
//test float casting
auto rand23 = std::bind(std::uniform_int_distribution<std::uint32_t>(0, (1<<23)-1), std::default_random_engine());
unary_test("half_cast<float>", [](half arg) -> bool { float a = half_cast<float>(arg), b = static_cast<float>(arg);
return *reinterpret_cast<std::uint32_t*>(&a) == *reinterpret_cast<std::uint32_t*>(&b); });
unary_test("half_cast<round_to_nearest>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return fpclassify(arg)==FP_ZERO ||
comp(half_cast<half,std::round_to_nearest>(f), ((n&(m>>1)) && ((n&((m>>1)-1)) || (h2b(arg)&1)))
? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
unary_test("half_cast<round_toward_zero>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_zero>(f), arg); });
unary_test("half_cast<round_toward_infinity>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_infinity>(f),
(!signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
unary_test("half_cast<round_toward_neg_infinity>(float)", [&rand23](half arg) -> bool { float f = half_cast<float>(arg);
std::uint32_t n=rand23(), m=1<<13; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint32_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_neg_infinity>(f),
(signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
//test double casting
auto rand52 = std::bind(std::uniform_int_distribution<std::uint64_t>(0, (1ULL<<52)-1), std::default_random_engine());
unary_test("half_cast<double>", [](half arg) -> bool { double a = half_cast<double>(arg), b = static_cast<float>(arg);
return isnan(arg) || *reinterpret_cast<std::uint64_t*>(&a) == *reinterpret_cast<std::uint64_t*>(&b); });
unary_test("half_cast<round_to_nearest>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return fpclassify(arg)==FP_ZERO ||
comp(half_cast<half,std::round_to_nearest>(f), ((n&(m>>1)) && ((n&((m>>1)-1)) || (h2b(arg)&1)))
? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
unary_test("half_cast<round_toward_zero>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_zero>(f), arg); });
unary_test("half_cast<round_toward_infinity>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_infinity>(f),
(!signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
unary_test("half_cast<round_toward_neg_infinity>(double)", [&rand52](half arg) -> bool { double f = half_cast<double>(arg);
std::uint64_t n=rand52(), m=1ULL<<42; if(fpclassify(arg)==FP_SUBNORMAL) m <<= std::min(std::max(-ilogb(arg)-14, 0), 10);
*reinterpret_cast<std::uint64_t*>(&f) |= n&(m-1)&-isfinite(arg); return comp(half_cast<half,std::round_toward_neg_infinity>(f),
(signbit(arg)&&(n&(m-1))) ? nextafter(arg, copysign(std::numeric_limits<half>::infinity(), arg)) : arg); });
//test casting to int
#if HALF_ENABLE_CPP11_CMATH
unary_test("half_cast<int>", [](half arg) -> bool { return !isfinite(arg) || half_cast<int>(arg) == static_cast<int>(nearbyint(arg)); });
#endif
unary_test("half_cast<int,round_to_nearest>", [](half arg) -> bool { float fi, ff = std::abs(std::modf(static_cast<float>(arg), &fi));
int i = static_cast<int>(fi); i += (-2*signbit(arg)+1) * (ff>0.5f || (ff==0.5f && i&1));
return !isfinite(arg) || half_cast<int,std::round_to_nearest>(arg) == i;
});
unary_test("half_cast<int,round_toward_zero>", [](half arg) -> bool { return !isfinite(arg) || half_cast<int,std::round_toward_zero>(arg) == static_cast<int>(arg); });
unary_test("half_cast<int,round_toward_infinity>", [](half arg) -> bool { float fi, ff = std::modf(static_cast<float>(arg), &fi);
return !isfinite(arg) || half_cast<int,std::round_toward_infinity>(arg) == (static_cast<int>(fi)+(ff>0.0f)); });
unary_test("half_cast<int,round_toward_neg_infinity>", [](half arg) -> bool { float fi, ff = std::modf(static_cast<float>(arg), &fi);
return !isfinite(arg) || half_cast<int,std::round_toward_neg_infinity>(arg) == (static_cast<int>(fi)-(ff<0.0f)); });
//test casting from int
int_test("half_cast<>(int)", [](int i) -> bool { return comp(half_cast<half>(i), half_cast<half>(static_cast<float>(i))); });
int_test("half_cast<round_to_nearest>(int)", [](int i) -> bool {
return comp(half_cast<half,std::round_to_nearest>(i), half_cast<half,std::round_to_nearest>(static_cast<float>(i))); });
int_test("half_cast<round_toward_zero>(int)", [](int i) -> bool {
return comp(half_cast<half,std::round_toward_zero>(i), half_cast<half,std::round_toward_zero>(static_cast<float>(i))); });
int_test("half_cast<round_toward_infinity>(int)", [](int i) -> bool {
return comp(half_cast<half,std::round_toward_infinity>(i), half_cast<half,std::round_toward_infinity>(static_cast<float>(i))); });
int_test("half_cast<round_toward_neg_infinity>(int)", [](int i) -> bool {
return comp(half_cast<half,std::round_toward_neg_infinity>(i), half_cast<half,std::round_toward_neg_infinity>(static_cast<float>(i))); });
//test numeric limits
unary_test("numeric_limits::min", [](half arg) { return !isnormal(arg) || signbit(arg) || arg>=std::numeric_limits<half>::min(); });
unary_test("numeric_limits::lowest", [](half arg) { return !isfinite(arg) || arg>=std::numeric_limits<half>::lowest(); });
unary_test("numeric_limits::max", [](half arg) { return !isfinite(arg) || arg<=std::numeric_limits<half>::max(); });
unary_test("numeric_limits::denorm_min", [](half arg) { return !isfinite(arg) ||
signbit(arg) || arg==static_cast<half>(0.0f) || arg>=std::numeric_limits<half>::denorm_min(); });
simple_test("numeric_limits::infinity", []() { return isinf(std::numeric_limits<half>::infinity()) &&
!signbit(std::numeric_limits<half>::infinity()); });
simple_test("numeric_limits::quiet_NaN", []() { return isnan(std::numeric_limits<half>::quiet_NaN()); });
simple_test("numeric_limits::signaling_NaN", []() { return isnan(std::numeric_limits<half>::signaling_NaN()); });
simple_test("numeric_limits::epsilon", []() { return nextafter(static_cast<half>(1.0f),
std::numeric_limits<half>::infinity())-static_cast<half>(1.0f) == std::numeric_limits<half>::epsilon(); });
binary_test("numeric_limits::round_error", [](half a, half b) -> bool { double c = static_cast<double>(a) +
static_cast<double>(b); return !isfinite(a) || !isfinite(b) || c>static_cast<double>(std::numeric_limits<half>::max()) ||
c<static_cast<double>(std::numeric_limits<half>::lowest()) || std::abs(c-static_cast<double>(
static_cast<half>(c)))<=std::ldexp(static_cast<double>(std::numeric_limits<half>::round_error()),
ilogb(static_cast<half>(c))-std::numeric_limits<half>::digits+1); });
#if HALF_ENABLE_CPP11_HASH
//test hash
binary_test("hash function", [](half a, half b) { return a != b || std::hash<half>()(a) == std::hash<half>()(b); });
struct { bool operator()(half a, half b) const { return h2b(a) == h2b(b); } } bincomp;
std::unordered_map<half,const half*,std::hash<half>,decltype(bincomp)> map(65536, std::hash<half>(), bincomp);
unary_test("hash insert", [&map](const half &arg) { return map.insert(std::make_pair(arg, &arg)).second; });
unary_test("hash retrieve", [&map](const half &arg) { return map[arg] == &arg; });
#endif
#if HALF_ENABLE_CPP11_USER_LITERALS
//test literals
simple_test("literals", []() -> bool { using namespace half_float::literal; return comp(0.0_h, half(0.0f)) && comp(-1.0_h, half(-1.0f)) &&
comp(+3.14159265359_h, half(3.14159265359f)) && comp(1e-2_h, half(1e-2f)) && comp(-4.2e3_h, half(-4.2e3f)); });
#endif
*/
if (failed_.empty())
log_ << "all tests passed\n";
else
{
log_ << (failed_.size()) << " OF " << tests_ << " FAILED:\n ";
std::copy(failed_.begin(), failed_.end(), std::ostream_iterator<std::string>(log_, "\n "));
log_ << '\n';
}
return failed_.size();
}
void performance_test()
{
std::vector<half> finite, positive, one2one, one2inf, neg2inf;
for (std::uint16_t u = 0; u < 0x7C00; ++u)
{
finite.push_back(b2h(u));
finite.push_back(b2h(u | 0x8000));
positive.push_back(b2h(u));
neg2inf.push_back(b2h(u));
if (u <= 0x3C00)
{
one2one.push_back(b2h(u));
one2one.push_back(b2h(u | 0x8000));
neg2inf.push_back(b2h(u | 0x8000));
}
else
one2inf.push_back(b2h(u));
}
std::vector<half> xs(finite), ys(finite), zs(finite), results(finite.size());
std::default_random_engine g;
std::shuffle(finite.begin(), finite.end(), g);
std::shuffle(positive.begin(), positive.end(), g);
std::shuffle(one2one.begin(), one2one.end(), g);
std::shuffle(one2inf.begin(), one2inf.end(), g);
std::shuffle(neg2inf.begin(), neg2inf.end(), g);
std::shuffle(xs.begin(), xs.end(), g);
std::shuffle(ys.begin(), ys.end(), g);
std::shuffle(zs.begin(), zs.end(), g);
/*
OPERATOR_PERFORMANCE_TEST(+, xs, ys, 4);
OPERATOR_PERFORMANCE_TEST(-, xs, ys, 4);
OPERATOR_PERFORMANCE_TEST(*, xs, ys, 4);
OPERATOR_PERFORMANCE_TEST(/, xs, ys, 4);
BINARY_PERFORMANCE_TEST(fdim, xs, ys, 8);
TERNARY_PERFORMANCE_TEST(fma, xs, ys, zs, 64);
UNARY_PERFORMANCE_TEST(exp, finite, 1000);
UNARY_PERFORMANCE_TEST(exp2, finite, 1000);
UNARY_PERFORMANCE_TEST(expm1, finite, 1000);
UNARY_PERFORMANCE_TEST(log, positive, 1000);
UNARY_PERFORMANCE_TEST(log10, positive, 1000);
UNARY_PERFORMANCE_TEST(log1p, neg2inf, 1000);
UNARY_PERFORMANCE_TEST(log2, positive, 1000);
UNARY_PERFORMANCE_TEST(sqrt, positive, 1000);
UNARY_PERFORMANCE_TEST(cbrt, finite, 1000);
BINARY_PERFORMANCE_TEST(pow, xs, ys, 8);
BINARY_PERFORMANCE_TEST(hypot, xs, ys, 8);
UNARY_PERFORMANCE_TEST(sin, finite, 1000);
UNARY_PERFORMANCE_TEST(cos, finite, 1000);
UNARY_PERFORMANCE_TEST(tan, finite, 1000);
UNARY_PERFORMANCE_TEST(asin, one2one, 1000);
UNARY_PERFORMANCE_TEST(acos, one2one, 1000);
UNARY_PERFORMANCE_TEST(atan, finite, 1000);
BINARY_PERFORMANCE_TEST(atan2, xs, ys, 8);
UNARY_PERFORMANCE_TEST(sinh, finite, 1000);
UNARY_PERFORMANCE_TEST(cosh, finite, 1000);
UNARY_PERFORMANCE_TEST(tanh, finite, 1000);
UNARY_PERFORMANCE_TEST(asinh, finite, 1000);
UNARY_PERFORMANCE_TEST(acosh, one2inf, 1000);
UNARY_PERFORMANCE_TEST(atanh, one2one, 1000);
UNARY_PERFORMANCE_TEST(erf, finite, 1000);
UNARY_PERFORMANCE_TEST(erfc, finite, 1000);
UNARY_PERFORMANCE_TEST(lgamma, finite, 1000);
UNARY_PERFORMANCE_TEST(tgamma, finite, 1000);
*/
}
private:
typedef std::vector<half> half_vector;
typedef std::map<std::string, half_vector> test_map;
typedef std::map<std::string, int> class_map;
template<typename F> bool class_test(const std::string &name, F &&test)
{
unsigned int count = 0;
log_ << "testing " << name << ":\n";
for (auto iterB = halfs_.begin(); iterB != halfs_.end(); ++iterB)
{
unsigned int passed = 0;
int fpclass = classes_[iterB->first];
for (auto iterH = iterB->second.begin(); iterH != iterB->second.end(); ++iterH)
passed += test(*iterH, fpclass);
log_ << " " << iterB->first << ": ";
if (passed == iterB->second.size())
{
log_ << "all passed\n";
++count;
}
else
log_ << (iterB->second.size() - passed) << " of " << iterB->second.size() << " FAILED\n";
}
log_ << '\n';
++tests_;
if (count == halfs_.size())
return true;
failed_.push_back(name);
return false;
}
template<typename F> bool simple_test(const std::string &name, F &&test)
{
log_ << "testing " << name << ": ";
bool passed = test();
log_ << (passed ? "passed" : "FAILED") << "\n\n";
++tests_;
if (!passed)
failed_.push_back(name);
return passed;
}
template<typename F> bool unary_test(const std::string &name, F &&test)
{
unsigned int count = 0, failed = 0;
log_ << "testing " << name << ":\n";
for (auto iterB = halfs_.begin(); iterB != halfs_.end(); ++iterB)
{
unsigned int passed = 0;
for (auto iterH = iterB->second.begin(); iterH != iterB->second.end(); ++iterH)
passed += test(*iterH);
log_ << " " << iterB->first << ": ";
if (passed == iterB->second.size())
{
log_ << "all passed\n";
++count;
}
else
{
failed += iterB->second.size() - passed;
log_ << (iterB->second.size() - passed) << " of " << iterB->second.size() << " FAILED\n";
}
}
if (csv_)
*csv_ << name << ";" << failed << '\n';
if (failed)
log_ << failed << " FAILED\n\n";
else
log_ << '\n';
++tests_;
if (count == halfs_.size())
return true;
failed_.push_back(name);
return false;
}
template<typename F> bool binary_test(const std::string &name, F &&test)
{
unsigned long tests = 0, count = 0, step = fast_ ? 64 : 1;
auto rand = std::bind(std::uniform_int_distribution<std::uint16_t>(0, step - 1), std::default_random_engine());
std::set<std::string> failed_tests;
log_ << "testing " << name << (fast_ ? ": " : ":\n");
for (auto iterB1 = halfs_.begin(); iterB1 != halfs_.end(); ++iterB1)
{
unsigned int end1 = /*(iterB1->first.find("NaN")==std::string::npos) ?*/ iterB1->second.size() /*: 1*/;
for (auto iterB2 = halfs_.begin(); iterB2 != halfs_.end(); ++iterB2)
{
if (!fast_)
std::cout << iterB1->first << " x " << iterB2->first;
bool failed = false;
unsigned int end2 = /*(iterB2->first.find("NaN")==std::string::npos) ?*/ iterB2->second.size() /*: 1*/;
for (unsigned int i = 0; i < end1; i += step)
{
half a = iterB1->second[i];
if (fast_ && end1 >= step)
a = b2h(h2b(a) | rand());
for (unsigned int j = 0; j < end2; j += step)
{
half b = iterB2->second[j];
if (fast_ && end2 >= step)
b = b2h(h2b(b) | rand());
bool success = test(a, b);
count += success;
failed = failed || !success;
++tests;
}
}
if (!fast_)
std::cout << " done\n";
if (failed)
failed_tests.insert(iterB1->first + " x " + iterB2->first);
}
}
bool passed = count == tests;
if (csv_)
*csv_ << name << ";" << (tests - count) << '\n';
if (passed)
log_ << "all passed\n\n";
else
{
log_ << (tests - count) << " of " << tests << " FAILED\n";
for (auto &&s : failed_tests)
log_ << s << " FAILED\n";
log_ << '\n';
failed_.push_back(name);
}
++tests_;
return passed;
}
template<typename F> bool ternary_test(const std::string &name, F &&test)
{
unsigned int tests = 0, count = 0, step = fast_ ? 256 : 1;
auto rand = std::bind(std::uniform_int_distribution<std::uint16_t>(0, step - 1), std::default_random_engine());
std::set<std::string> failed_tests;
log_ << "testing " << name << ": ";
for (auto iterB1 = halfs_.begin(); iterB1 != halfs_.end(); ++iterB1)
{
unsigned int end1 = /*(iterB1->first.find("NaN")==std::string::npos) ?*/ iterB1->second.size() /*: 1*/;
for (auto iterB2 = halfs_.begin(); iterB2 != halfs_.end(); ++iterB2)
{
unsigned int end2 = /*(iterB2->first.find("NaN")==std::string::npos) ?*/ iterB2->second.size() /*: 1*/;
for (auto iterB3 = halfs_.begin(); iterB3 != halfs_.end(); ++iterB3)
{
bool failed = false;
unsigned int end3 = /*(iterB3->first.find("NaN")==std::string::npos) ?*/ iterB3->second.size() /*: 1*/;
for (unsigned int i = 0; i < end1; i += step)
{
half a = iterB1->second[i];
if (fast_ && end1 >= step)
a = b2h(h2b(a) | rand());
for (unsigned int j = 0; j < end2; j += step)
{
half b = iterB2->second[j];
if (fast_ && end2 >= step)
b = b2h(h2b(b) | rand());
for (unsigned int k = 0; k < end3; k += step)
{
half c = iterB3->second[k];
if (fast_ && end3 >= step)
c = b2h(h2b(c) | rand());
bool success = test(a, b, c);
count += success;
failed = failed || !success;
++tests;
}
}
}
if (failed)
failed_tests.insert(iterB1->first + " x " + iterB2->first + " x " + iterB3->first);
}
}
}
bool passed = count == tests;
if (csv_)
*csv_ << name << ";" << (tests - count) << '\n';
if (passed)
log_ << "all passed\n\n";
else
{
log_ << (tests - count) << " of " << tests << " failed\n\n";
for (auto &&s : failed_tests)
log_ << s << " FAILED\n";
log_ << '\n';
failed_.push_back(name);
}
++tests_;
return passed;
}
template<typename F> bool float_test(const std::string &name, F &&test)
{
auto rand32 = std::bind(std::uniform_int_distribution<std::uint32_t>(0, std::numeric_limits<std::uint32_t>::max()), std::default_random_engine());
unsigned long long count = 0, tests = fast_ ? 1e6 : (1ULL << 32);
log_ << "testing " << name << ": ";
if (fast_)
{
for (unsigned long long i = 0; i < tests; ++i)
{
std::uint32_t u = rand32();
count += test(*reinterpret_cast<float*>(&u));
}
}
else
for (std::uint32_t i = 0; i++ > 0; )
count += test(*reinterpret_cast<float*>(&i));
bool passed = count == tests;
if (passed)
log_ << "all passed\n\n";
else
{
log_ << (tests - count) << " of " << tests << " FAILED\n\n";
failed_.push_back(name);
}
++tests_;
return passed;
}
template<typename F> bool int_test(const std::string &name, F &&test)
{
unsigned int count = 0, tests = (1 << 17) + 1;
log_ << "testing " << name << ": ";
for (int i = -(1 << 16); i <= (1 << 16); ++i)
count += test(i);
bool passed = count == tests;
if (passed)
log_ << "all passed\n\n";
else
{
log_ << (tests - count) << " of " << tests << " FAILED\n\n";
failed_.push_back(name);
}
++tests_;
return passed;
}
template<typename F> bool unary_reference_test(const std::string &name, F &&fn)
{
std::vector<std::pair<half, half>> reference(std::numeric_limits<std::uint16_t>::max() + 1);
std::ifstream in("reference/" + name, std::ios_base::in | std::ios_base::binary);
if (!in)
throw std::runtime_error("cannot open reference file for " + name);
in.read(reinterpret_cast<char*>(reference.data()), reference.size() * sizeof(reference.front()));
double err = 0.0, rel = 0.0; int bin = 0;
bool success = unary_test(name, [&, this](half arg) -> bool {
auto ref = reference[h2b(arg)];
half a = fn(arg), b = select<std::numeric_limits<half>::round_style>(ref);
bool equal = (rough_ || std::numeric_limits<half>::round_style == std::round_indeterminate) ? (comp(a, ref.first) || comp(a, ref.second)) : comp(a, b);
if (!equal)
{
double error = std::abs(static_cast<double>(a) - static_cast<double>(b));
// if(std::abs(h2b(a)-h2b(b)) > 1)
// if(std::isinf(error/std::abs(b)))
// std::cerr << arg << '(' << std::hex << h2b(arg) << ") = " << a << '(' << std::hex << h2b(a) << "), " << b << '(' << h2b(b) << ") -> " << error << '\n' << std::dec;
err = std::max(err, error); rel = std::max(rel, error / std::abs(b)); bin = std::max(bin, std::abs(h2b(a) - h2b(b)));
}
return equal;
});
if (err != 0.0 || rel != 0.0)
std::cout << name << " max error: " << err << ", max relative error: " << rel << ", max ulp error: " << /*ilog2*/(bin) << '\n';
return success;
}
template<typename F> bool binary_reference_test(const std::string &name, F &&fn)
{
struct record { half x, y; std::pair<half, half> result; };
std::ifstream in("reference/" + name, std::ios_base::in | std::ios_base::binary | std::ios_base::ate);
if (!in)
throw std::runtime_error("cannot open reference file for " + name);
unsigned int passed = 0, count = in.tellg() / sizeof(record);
std::vector<record> reference(count);
in.seekg(0, std::ios_base::beg);
in.clear();
in.read(reinterpret_cast<char*>(reference.data()), reference.size() * sizeof(reference.front()));
double err = 0.0, rel = 0.0; int bin = 0;
bool success = simple_test(name, [&, this]() -> bool {
for (unsigned int i = 0; i < count; ++i)
{
auto ref = reference[i];
half x = ref.x, y = ref.y, a = fn(x, y), b = select<std::numeric_limits<half>::round_style>(ref.result);
bool equal = (rough_ || std::numeric_limits<half>::round_style == std::round_indeterminate) ? (comp(a, ref.result.first) || comp(a, ref.result.second)) : comp(a, b);
if (!equal)
{
double error = std::abs(static_cast<double>(a) - static_cast<double>(b));
// if(std::abs(h2b(a)-h2b(b)) > 1)
// std::cerr << x << ", " << y << " = " << a << '(' << std::hex << h2b(a) << "), " << b << '(' << h2b(b) << ") -> " << error << '\n' << std::dec;
err = std::max(err, error); rel = std::max(rel, error / std::abs(b)); bin = std::max(bin, std::abs(h2b(a) - h2b(b)));
}
passed += equal;
}
if (csv_)
*csv_ << name << ";" << (count - passed) << '\n';
return passed == count;
});
if (passed != count)
std::cout << name << ": " << (count - passed) << " of " << count << " failed\n";
if (err != 0.0 || rel != 0.0)
std::cout << name << " max error: " << err << ", max relative error: " << rel << ", max ulp error: " << /*ilog2*/(bin) << '\n';
return success;
}
template<typename F> bool ternary_reference_test(const std::string &name, F &&fn)
{
struct record { half x, y, z; std::pair<half, half> result; };
std::ifstream in("reference/" + name, std::ios_base::in | std::ios_base::binary | std::ios_base::ate);
if (!in)
throw std::runtime_error("cannot open reference file for " + name);
unsigned int passed = 0, count = in.tellg() / sizeof(record);
std::vector<record> reference(count);
in.seekg(0, std::ios_base::beg);
in.clear();
in.read(reinterpret_cast<char*>(reference.data()), reference.size() * sizeof(reference.front()));
double err = 0.0, rel = 0.0; int bin = 0;
bool success = simple_test(name, [&, this]() -> bool {
for (unsigned int i = 0; i < count; ++i)
{
auto ref = reference[i];
half x = ref.x, y = ref.y, z = ref.z, a = fn(x, y, z), b = select<std::numeric_limits<half>::round_style>(ref.result);
bool equal = (rough_ || std::numeric_limits<half>::round_style == std::round_indeterminate) ? (comp(a, ref.result.first) || comp(a, ref.result.second)) : comp(a, b);
if (!equal)
{
double error = std::abs(static_cast<double>(a) - static_cast<double>(b));
// std::cerr << x << ", " << y << ", " << z << " = " << a << '(' << std::hex << h2b(a) << "), " << b << '(' << h2b(b) << ") -> " << error << '\n' << std::dec;
err = std::max(err, error); rel = std::max(rel, error / std::abs(b)); bin = std::max(bin, std::abs(h2b(a) - h2b(b)));
}
passed += equal;
}
if (csv_)
*csv_ << name << ";" << (count - passed) << '\n';
return passed == count;
});
if (passed != count)
std::cout << name << ": " << (count - passed) << " of " << count << " failed\n";
if (err != 0.0 || rel != 0.0)
std::cout << name << " max error: " << err << ", max relative error: " << rel << ", max ulp error: " << /*ilog2*/(bin) << '\n';
return success;
}
test_map halfs_;
class_map classes_;
unsigned int tests_;
std::vector<std::string> failed_;
std::ostream &log_;
std::ostream *csv_;
bool fast_;
bool rough_;
};
struct timer
{
timer() : start_(std::chrono::high_resolution_clock::now()) {}
~timer() { std::cout << "time: " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start_).count() << " ms\n"; }
private:
std::chrono::time_point<std::chrono::high_resolution_clock> start_;
};
int main(int argc, char *argv[]) try
{
#ifndef HALF_ARITHMETIC_TYPE
switch (std::numeric_limits<half>::round_style)
{
#ifdef _WIN32
case std::round_to_nearest: _controlfp(_MCW_RC, _RC_NEAR); break;
case std::round_toward_zero: _controlfp(_MCW_RC, _RC_CHOP); break;
case std::round_toward_infinity: _controlfp(_MCW_RC, _RC_UP); break;
case std::round_toward_neg_infinity: _controlfp(_MCW_RC, _RC_DOWN); break;
#else
case std::round_to_nearest: std::fesetround(FE_TONEAREST); break;
case std::round_toward_zero: std::fesetround(FE_TOWARDZERO); break;
case std::round_toward_infinity: std::fesetround(FE_UPWARD); break;
case std::round_toward_neg_infinity: std::fesetround(FE_DOWNWARD); break;
#endif
}
#endif
/*
auto rand_abs = std::bind(std::uniform_int_distribution<std::uint32_t>(0x00000000, 0x7F100000), std::default_random_engine());
auto rand_sign = std::bind(std::uniform_int_distribution<std::uint32_t>(0, 1), std::default_random_engine());
std::vector<float> floats;
for(unsigned int i=0; i<1e8; ++i)
{
auto bits = rand_abs() | (rand_sign()<<31);
floats.push_back(*reinterpret_cast<float*>(&bits));
}
std::shuffle(floats.begin(), floats.end(), std::default_random_engine());
std::vector<half> halfs(floats.size());
{
timer time;
for(std::size_t i=0; i<floats.size(); ++i)
halfs[i] = half_cast<half,std::round_to_nearest>(floats[i]);
}
return 0;
half pi = half_cast<half>(3.1415926535897932384626433832795l);
std::cout << "Pi: " << pi << " - 0x" << std::hex << std::setfill('0') << std::setw(4) << h2b(pi) << std::dec
<< " - " << std::bitset<16>(static_cast<unsigned long long>(h2b(pi))).to_string() << std::endl;
half e = half_cast<half>(2.7182818284590452353602874713527l);
std::cout << "e: " << e << " - 0x" << std::hex << std::setfill('0') << std::setw(4) << h2b(e) << std::dec
<< " - " << std::bitset<16>(static_cast<unsigned long long>(h2b(e))).to_string() << std::endl;
static const long double logs[] = {
1.0000000000000000000000000000000000000000000000000000000000000000000000000000L, 0.5849625007211561814537389439478165087598144076924810604557526545410982276485L,
0.3219280948873623478703194294893901758648313930245806120547563958159347765589L, 0.1699250014423123629074778878956330175196288153849621209115053090821964552970L,
0.0874628412503394082540660108104043540112672823448206881266090643866965081686L, 0.0443941193584534376531019906736094674630459333742491317685543002674288465967L,
0.0223678130284545082671320837460849094932677948156179815932199216587899627785L, 0.0112272554232541203378805844158839407281095943600297940811823651462712311786L,
0.0056245491938781069198591026740666017211096815383520359072957784732489771013L, 0.0028150156070540381547362547502839489729507927389771959487826944878598909400L,
0.0014081943928083889066101665016890524233311715793462235597709051792834906001L, 0.0007042690112466432585379340422201964456668872087249334581924550139514213168L,
0.0003521774803010272377989609925281744988670304302127133979341729842842377649L, 0.0001760994864425060348637509459678580940163670081839283659942864068257522373L,
0.0000880524301221769086378699983597183301490534085738474534831071719854721939L, 0.0000440268868273167176441087067175806394819146645511899503059774914593663365L,
0.0000220136113603404964890728830697555571275493801909791504158295359319433723L, 0.0000110068476674814423006223021573490183469930819844945565597452748333526464L,
0.0000055034343306486037230640321058826431606183125807276574241540303833251704L, 0.0000027517197895612831123023958331509538486493412831626219340570294203116559L,
0.0000013758605508411382010566802834037147561973553922354232704569052932922954L, 0.0000006879304394358496786728937442939160483304056131990916985043387874690617L,
0.0000003439652607217645360118314743718005315334062644619363447395987584138324L, 0.0000001719826406118446361936972479533123619972434705828085978955697643547921L,
0.0000000859913228686632156462565208266682841603921494181830811515318381744650L, 0.0000000429956620750168703982940244684787907148132725669106053076409624949917L,
0.0000000214978311976797556164155504126645192380395989504741781512309853438587L, 0.0000000107489156388827085092095702361647949603617203979413516082280717515504L,
0.0000000053744578294520620044408178949217773318785601260677517784797554422804L, 0.0000000026872289172287079490026152352638891824761667284401180026908031182361L,
0.0000000013436144592400232123622589569799954658536700992739887706412976115422L, 0.0000000006718072297764289157920422846078078155859484240808550018085324187007L };
std::ofstream out("logs.txt");
for(auto val : logs)
out << "0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << std::llrint(std::ldexp(val, 27)) << ", \n";
return 0;
using namespace half_float::literal;
std::cout << "0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << std::llrint(std::ldexp(0.6072529350088812561694l, 30)) << '\n';
std::ofstream out("atans.txt");
for(int i=0; i<32; ++i)
out << "0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << std::llrint(std::ldexp(std::atan(std::ldexp(1.0l, -i)), 30)) << ", \n";
return 0;
for(std::uint16_t i=0x3C00; i<0x7C00; ++i)
{
half x = b2h(i), y = half_cast<half,std::round_toward_neg_infinity>(std::erfc(half_cast<double>(x)));
std::cout << x << " (" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << i << std::dec << ")\t= " << y << '\n';
if(y == 0.0_h)
return 0;
}
std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(9) << std::llrint(std::ldexp(3.15l, 31-1)) << '\n';
std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(9) << std::llrint(std::ldexp(3.85l, 31-1)) << '\n';
std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(9) << std::llrint(std::ldexp(4.65l, 31-2)) << '\n';
return 0;
for(std::uint16_t i=0xBC00; i<0xFC00; ++i)
{
half x = b2h(i), y = half_cast<half, std::round_to_nearest>(std::exp2(half_cast<double>(x)));
std::cout << x << " (" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << i << std::dec << ")\t= " << y << '\n';
if(y == 0.0_h)
return 0;
}
*/
std::vector<std::string> args(argv + 1, argv + argc);
std::unique_ptr<std::ostream> file, csv;
bool fast = false, rough = false;
for (auto &&arg : args)
{
if (arg == "-fast")
fast = true;
else if (arg == "-rough")
rough = true;
else if (arg.length() > 4 && arg.substr(arg.length() - 4) == ".csv")
csv.reset(new std::ofstream(arg));
else
file.reset(new std::ofstream(arg));
}
half_test test(file ? *file : std::cout, csv.get(), fast, rough);
test.performance_test();
timer time;
return test.test();
}
catch (const std::exception &e)
{
std::cerr << "ERROR: " << e.what() << '\n';
return -1;
}
/*
* This implementation is extracted from PyTorch:
* Repo: github.com/pytorch/pytorch
* File: torch/lib/TH/THHalf.c
* Commit ID: 92481b59d31199df57420d4b14912348cc780d1d
* Functions are made "static inline" for performance
*/
/* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. */
// Host functions for converting between FP32 and FP16 formats
static inline void TH_halfbits2float(unsigned short* src, float* res)
{
unsigned h = *src;
unsigned sign = ((h >> 15) & 1);
unsigned exponent = ((h >> 10) & 0x1f);
unsigned mantissa = ((h & 0x3ff) << 13);
if (exponent == 0x1f) { /* NaN or Inf */
mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
exponent = 0xff;
}
else if (!exponent) { /* Denorm or Zero */
if (mantissa) {
unsigned int msb;
exponent = 0x71;
do {
msb = (mantissa & 0x400000);
mantissa <<= 1; /* normalize */
--exponent;
} while (!msb);
mantissa &= 0x7fffff; /* 1.mantissa is implicit */
}
}
else {
exponent += 0x70;
}
*(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);
}
static inline void TH_float2halfbits(float* src, unsigned short* dest)
{
unsigned x = *(unsigned*)src;
unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
unsigned sign, exponent, mantissa;
// Get rid of +NaN/-NaN case first.
if (u > 0x7f800000) {
*dest = 0x7fffU;
return;
}
sign = ((x >> 16) & 0x8000);
// Get rid of +Inf/-Inf, +0/-0.
if (u > 0x477fefff) {
*dest = sign | 0x7c00U;
return;
}
if (u < 0x33000001) {
*dest = (sign | 0x0000);
return;
}
exponent = ((u >> 23) & 0xff);
mantissa = (u & 0x7fffff);
if (exponent > 0x70) {
shift = 13;
exponent -= 0x70;
}
else {
shift = 0x7e - exponent;
exponent = 0;
mantissa |= 0x800000;
}
lsb = (1 << shift);
lsb_s1 = (lsb >> 1);
lsb_m1 = (lsb - 1);
// Round to nearest even.
remainder = (mantissa & lsb_m1);
mantissa >>= shift;
if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
++mantissa;
if (!(mantissa & 0x3ff)) {
++exponent;
mantissa = 0;
}
}
*dest = (sign | (exponent << 10) | mantissa);
}
/*
* This implementation is extracted from Eigen:
* Repo: bitbucket.org/eigen/eigen
* File: Eigen/src/Core/arch/CUDA/Half.h
* Commit ID: 96e0f73a35de54f675d825bef5339b2f08e77eb4
*
* Removed a lot of redundant and cuda-specific code.
*/
#define EIGEN_STRONG_INLINE static inline
#define EIGEN_DEVICE_FUNC
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
//
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
// The original license follows:
//
// Copyright (c) Fabian Giesen, 2016
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
// type Eigen::half (inheriting from CUDA's __half struct) with
// operator overloads such that it behaves basically as an arithmetic
// type. It will be quite slow on CPUs (so it is recommended to stay
// in fp32 for CPUs, except for simple parameter conversions, I/O
// to disk and the likes), but fast on GPUs.
#ifndef EIGEN_HALF_CUDA_H
#define EIGEN_HALF_CUDA_H
namespace Eigen {
namespace half_impl {
// Make our own __half definition that is similar to CUDA's.
struct __half {
EIGEN_DEVICE_FUNC __half() : x(0) {}
explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
unsigned short x;
};
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
// Conversion routines, including fallbacks for the host or older CUDA.
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
// these in hardware. If we need more performance on older/other CPUs, they are
// also possible to vectorize directly.
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
__half h;
h.x = x;
return h;
}
union FP32 {
unsigned int u;
float f;
};
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(ff);
#elif defined(EIGEN_HAS_FP16_C)
__half h;
h.x = _cvtss_sh(ff, 0);
return h;
#else
FP32 f; f.f = ff;
const FP32 f32infty = { 255 << 23 };
const FP32 f16max = { (127 + 16) << 23 };
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
unsigned int sign_mask = 0x80000000u;
__half o;
o.x = static_cast<unsigned short>(0x0u);
unsigned int sign = f.u & sign_mask;
f.u ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
}
else { // (De)normalized number or zero
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
f.f += denorm_magic.f;
// and one integer subtract of the bias later, we have our final float!
o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
}
else {
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
// update exponent, rounding bias part 1
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
// rounding bias part 2
f.u += mant_odd;
// take the bits!
o.x = static_cast<unsigned short>(f.u >> 13);
}
}
o.x |= static_cast<unsigned short>(sign >> 16);
return o;
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(h);
#elif defined(EIGEN_HAS_FP16_C)
return _cvtsh_ss(h.x);
#else
const FP32 magic = { 113 << 23 };
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
FP32 o;
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
unsigned int exp = shifted_exp & o.u; // just the exponent
o.u += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) { // Inf/NaN?
o.u += (128 - 16) << 23; // extra exp adjust
}
else if (exp == 0) { // Zero/Denormal?
o.u += 1 << 23; // extra exp adjust
o.f -= magic.f; // renormalize
}
o.u |= (h.x & 0x8000) << 16; // sign bit
return o.f;
#endif
}
} // end namespace half_impl
} // end namespace Eigen
#endif // EIGEN_HALF_CUDA_H
#pragma once
#include <stdint.h>
/*
* This code snippet posted by user Phernost on
* https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
*
* compress and decompress methods are made "inline" for performance
*/
class Float16Compressor
{
union Bits
{
float f;
int32_t si;
uint32_t ui;
};
static int const shift = 13;
static int const shiftSign = 16;
static int32_t const infN = 0x7F800000; // flt32 infinity
static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32
static int32_t const minN = 0x38800000; // min flt16 normal as a flt32
static int32_t const signN = 0x80000000; // flt32 sign bit
static int32_t const infC = infN >> shift;
static int32_t const nanN = (infC + 1) << shift; // minimum flt16 nan as a flt32
static int32_t const maxC = maxN >> shift;
static int32_t const minC = minN >> shift;
static int32_t const signC = signN >> shiftSign; // flt16 sign bit
static int32_t const mulN = 0x52000000; // (1 << 23) / minN
static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift))
static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted
static int32_t const norC = 0x00400; // min flt32 normal down shifted
static int32_t const maxD = infC - maxC - 1;
static int32_t const minD = minC - subC - 1;
public:
inline static uint16_t compress(float value)
{
Bits v, s;
v.f = value;
uint32_t sign = v.si & signN;
v.si ^= sign;
sign >>= shiftSign; // logical shift
s.si = mulN;
s.si = s.f * v.f; // correct subnormals
v.si ^= (s.si ^ v.si) & -(minN > v.si);
v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
v.ui >>= shift; // logical shift
v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
return v.ui | sign;
}
inline static float decompress(uint16_t value)
{
Bits v;
v.ui = value;
int32_t sign = v.si & signC;
v.si ^= sign;
sign <<= shiftSign;
v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
Bits s;
s.si = mulC;
s.f *= v.si;
int32_t mask = -(norC > v.si);
v.si <<= shift;
v.si ^= (s.si ^ v.si) & mask;
v.si |= sign;
return v.f;
}
};
\ No newline at end of file
/*
* This implementation is extracted from numpy:
* Repo: github.com/numpy/numpy
* File: numpy/core/src/npymath/halffloat.c
* Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
* Functions are made "static inline" for performance, and
* non-conversion functions are removed, and generation of
* exceptions is disabled.
*/
#include <cstdint>
typedef uint16_t npy_uint16;
typedef uint32_t npy_uint32;
typedef uint64_t npy_uint64;
/*
* This chooses between 'ties to even' and 'ties away from zero'.
*/
#define NPY_HALF_ROUND_TIES_TO_EVEN 1
/*
* If these are 1, the conversions try to trigger underflow,
* overflow, and invalid exceptions in the FP system when needed.
*/
#define NPY_HALF_GENERATE_OVERFLOW 0
#define NPY_HALF_GENERATE_UNDERFLOW 0
#define NPY_HALF_GENERATE_INVALID 0
/*
********************************************************************
* BIT-LEVEL CONVERSIONS *
********************************************************************
*/
static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
{
npy_uint32 f_exp, f_sig;
npy_uint16 h_sgn, h_exp, h_sig;
h_sgn = (npy_uint16)((f & 0x80000000u) >> 16);
f_exp = (f & 0x7f800000u);
/* Exponent overflow/NaN converts to signed inf/NaN */
if (f_exp >= 0x47800000u) {
if (f_exp == 0x7f800000u) {
/* Inf or NaN */
f_sig = (f & 0x007fffffu);
if (f_sig != 0) {
/* NaN - propagate the flag in the significand... */
npy_uint16 ret = (npy_uint16)(0x7c00u + (f_sig >> 13));
/* ...but make sure it stays a NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
}
else {
/* signed inf */
return (npy_uint16)(h_sgn + 0x7c00u);
}
}
else {
/* overflow to signed inf */
#if NPY_HALF_GENERATE_OVERFLOW
npy_set_floatstatus_overflow();
#endif
return (npy_uint16)(h_sgn + 0x7c00u);
}
}
/* Exponent underflow converts to a subnormal half or signed zero */
if (f_exp <= 0x38000000u) {
/*
* Signed zeros, subnormal floats, and floats with small
* exponents all convert to signed zero halfs.
*/
if (f_exp < 0x33000000u) {
#if NPY_HALF_GENERATE_UNDERFLOW
/* If f != 0, it underflowed to 0 */
if ((f & 0x7fffffff) != 0) {
npy_set_floatstatus_underflow();
}
#endif
return h_sgn;
}
/* Make the subnormal significand */
f_exp >>= 23;
f_sig = (0x00800000u + (f & 0x007fffffu));
#if NPY_HALF_GENERATE_UNDERFLOW
/* If it's not exactly represented, it underflowed */
if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
npy_set_floatstatus_underflow();
}
#endif
f_sig >>= (113 - f_exp);
/* Handle rounding by adding 1 to the bit beyond half precision */
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((f_sig & 0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
#else
f_sig += 0x00001000u;
#endif
h_sig = (npy_uint16)(f_sig >> 13);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp from zero to one and h_sig will be zero.
* This is the correct result.
*/
return (npy_uint16)(h_sgn + h_sig);
}
/* Regular case with no overflow or underflow */
h_exp = (npy_uint16)((f_exp - 0x38000000u) >> 13);
/* Handle rounding by adding 1 to the bit beyond half precision */
f_sig = (f & 0x007fffffu);
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((f_sig & 0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
#else
f_sig += 0x00001000u;
#endif
h_sig = (npy_uint16)(f_sig >> 13);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp by one and h_sig will be zero. This is the
* correct result. h_exp may increment to 15, at greatest, in
* which case the result overflows to a signed inf.
*/
#if NPY_HALF_GENERATE_OVERFLOW
h_sig += h_exp;
if (h_sig == 0x7c00u) {
npy_set_floatstatus_overflow();
}
return h_sgn + h_sig;
#else
return h_sgn + h_exp + h_sig;
#endif
}
static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
{
npy_uint64 d_exp, d_sig;
npy_uint16 h_sgn, h_exp, h_sig;
h_sgn = (d & 0x8000000000000000ULL) >> 48;
d_exp = (d & 0x7ff0000000000000ULL);
/* Exponent overflow/NaN converts to signed inf/NaN */
if (d_exp >= 0x40f0000000000000ULL) {
if (d_exp == 0x7ff0000000000000ULL) {
/* Inf or NaN */
d_sig = (d & 0x000fffffffffffffULL);
if (d_sig != 0) {
/* NaN - propagate the flag in the significand... */
npy_uint16 ret = (npy_uint16)(0x7c00u + (d_sig >> 42));
/* ...but make sure it stays a NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
}
else {
/* signed inf */
return h_sgn + 0x7c00u;
}
}
else {
/* overflow to signed inf */
#if NPY_HALF_GENERATE_OVERFLOW
npy_set_floatstatus_overflow();
#endif
return h_sgn + 0x7c00u;
}
}
/* Exponent underflow converts to subnormal half or signed zero */
if (d_exp <= 0x3f00000000000000ULL) {
/*
* Signed zeros, subnormal floats, and floats with small
* exponents all convert to signed zero halfs.
*/
if (d_exp < 0x3e60000000000000ULL) {
#if NPY_HALF_GENERATE_UNDERFLOW
/* If d != 0, it underflowed to 0 */
if ((d & 0x7fffffffffffffffULL) != 0) {
npy_set_floatstatus_underflow();
}
#endif
return h_sgn;
}
/* Make the subnormal significand */
d_exp >>= 52;
d_sig = (0x0010000000000000ULL + (d & 0x000fffffffffffffULL));
#if NPY_HALF_GENERATE_UNDERFLOW
/* If it's not exactly represented, it underflowed */
if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
npy_set_floatstatus_underflow();
}
#endif
d_sig >>= (1009 - d_exp);
/* Handle rounding by adding 1 to the bit beyond half precision */
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
d_sig += 0x0000020000000000ULL;
}
#else
d_sig += 0x0000020000000000ULL;
#endif
h_sig = (npy_uint16)(d_sig >> 42);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp from zero to one and h_sig will be zero.
* This is the correct result.
*/
return h_sgn + h_sig;
}
/* Regular case with no overflow or underflow */
h_exp = (npy_uint16)((d_exp - 0x3f00000000000000ULL) >> 42);
/* Handle rounding by adding 1 to the bit beyond half precision */
d_sig = (d & 0x000fffffffffffffULL);
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
d_sig += 0x0000020000000000ULL;
}
#else
d_sig += 0x0000020000000000ULL;
#endif
h_sig = (npy_uint16)(d_sig >> 42);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp by one and h_sig will be zero. This is the
* correct result. h_exp may increment to 15, at greatest, in
* which case the result overflows to a signed inf.
*/
#if NPY_HALF_GENERATE_OVERFLOW
h_sig += h_exp;
if (h_sig == 0x7c00u) {
npy_set_floatstatus_overflow();
}
return h_sgn + h_sig;
#else
return h_sgn + h_exp + h_sig;
#endif
}
static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
{
npy_uint16 h_exp, h_sig;
npy_uint32 f_sgn, f_exp, f_sig;
h_exp = (h & 0x7c00u);
f_sgn = ((npy_uint32)h & 0x8000u) << 16;
switch (h_exp) {
case 0x0000u: /* 0 or subnormal */
h_sig = (h & 0x03ffu);
/* Signed zero */
if (h_sig == 0) {
return f_sgn;
}
/* Subnormal */
h_sig <<= 1;
while ((h_sig & 0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
f_sig = ((npy_uint32)(h_sig & 0x03ffu)) << 13;
return f_sgn + f_exp + f_sig;
case 0x7c00u: /* inf or NaN */
/* All-ones exponent and a copy of the significand */
return f_sgn + 0x7f800000u + (((npy_uint32)(h & 0x03ffu)) << 13);
default: /* normalized */
/* Just need to adjust the exponent and shift */
return f_sgn + (((npy_uint32)(h & 0x7fffu) + 0x1c000u) << 13);
}
}
static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
{
npy_uint16 h_exp, h_sig;
npy_uint64 d_sgn, d_exp, d_sig;
h_exp = (h & 0x7c00u);
d_sgn = ((npy_uint64)h & 0x8000u) << 48;
switch (h_exp) {
case 0x0000u: /* 0 or subnormal */
h_sig = (h & 0x03ffu);
/* Signed zero */
if (h_sig == 0) {
return d_sgn;
}
/* Subnormal */
h_sig <<= 1;
while ((h_sig & 0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
d_sig = ((npy_uint64)(h_sig & 0x03ffu)) << 42;
return d_sgn + d_exp + d_sig;
case 0x7c00u: /* inf or NaN */
/* All-ones exponent and a copy of the significand */
return d_sgn + 0x7ff0000000000000ULL +
(((npy_uint64)(h & 0x03ffu)) << 42);
default: /* normalized */
/* Just need to adjust the exponent and shift */
return d_sgn + (((npy_uint64)(h & 0x7fffu) + 0xfc000u) << 42);
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论