Commit 41dbf0a9 by ltb

add cpu half test library , dir is source/tensor/halfLib

parent 1da50ae2
#include <iostream>
#include <assert.h>
#include <direct.h>
#include "../core/utilities/FlushToMem.h"
#include "../core/getandset/ConvertDataType.h"
#include "../XTensor.h"
#include "umHalf.h"
using namespace nts;
//#define VALIDATE(x) if (!(x)){std::cout << "Failed: " << #x << std::endl;assert((x));}
int main(int argc, char* argv[])
{
char *path;
path = getcwd(NULL, 0);
strcat(path, "\\source\\tensor\\HalfFloat\\dump");
XTensor a;
XTensor halfa;
int dim = 4;
int devId = 0;
InitTensor2DV2(&a,dim,dim,X_FLOAT,devId);
a.SetDataRand(-2.0,2.0);
halfa = ConvertDataType(a, X_FLOAT16);
halfa.Dump(&halfa, stderr, "halfa:");
GPUToCPUFlush(&halfa);
FILE * file = fopen(path, "wb");
halfa.Dump(file, "halfa:");
fclose(file);
XTensor halfb;
InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
FILE *read = fopen(path, "rb");
halfb.Read(read, "halfa:");
fclose(read);
halfb.Dump(&halfb, stderr, "halfb:");
//half h = 1.f, h2 = 2.f;
//--h2;
//++h2;
//--h;
//++h;
//h2 -= 1.f;
//float f = h2, f2 = h;
//VALIDATE(1.f == f && f == f2);
//half dddd = 15.5;
//float hhhh = 15.5;
//printf("%x\n", dddd);
//printf("%x\n", hhhh);
//h = h2;
//h2 = 15.5f;
//f = h2, f2 = h;
//VALIDATE(15.5f == f && 1.f == f2);
//h2 *= h;
//f = h2, f2 = h;
//VALIDATE(15.5f == f && 1.f == f2);
//h2 /= h;
//f = h2, f2 = h;
//VALIDATE(15.5f == f && 1.f == f2);
//h2 += h;
//f = h2, f2 = h;
//VALIDATE(16.5f == f && 1.f == f2);
//h++; h++; h++;
//h2 = -h2;
//h2 += 17.5f;
//h2 *= h;
//f = h2, f2 = h;
//VALIDATE(4.f == f && 4.f == f2);
//VALIDATE(h == h2);
//VALIDATE(h <= h2);
//--h;
//VALIDATE(h <= h2);
//h -= 250.f;
//VALIDATE(h < h2);
//h += 500.f;
//VALIDATE(h > h2);
//VALIDATE(h >= h2);
//f = h2, f2 = h;
//VALIDATE(h * h2 == (half)(f * f2));
//// addition
//// ****************************************************************************
//// identical exponents
//for (float f = 0.f; f < 1000.f; ++f)
//{
// half one = f;
// half two = f;
// half three = one + two;
// f2 = three;
// VALIDATE(f*2.f == f2);
//}
//// different exponents
//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
//{
// half one = f;
// half two = fp;
// half three = one + two;
// f2 = three;
// VALIDATE(f + fp == f2);
//}
//// very small numbers - this is already beyond the accuracy of 16 bit floats.
//for (float f = 0.003f; f < 1000.f; f += 0.0005f)
//{
// half one = f;
// half two = f;
// half three = one + two;
// f2 = three;
// float m = f * 2.f;
// VALIDATE(f2 > (m - 0.05*m) && f2 < (m + 0.05*m));
//}
//// subtraction
//// ****************************************************************************
//// identical exponents
//for (float f = 0.f; f < 1000.f; ++f)
//{
// half one = f;
// half two = f;
// half three = one - two;
// f2 = three;
// VALIDATE(0.f == f2);
//}
//// different exponents
//for (float f = 0.f, fp = 1000.f; f < 500.f; ++f, --fp)
//{
// half one = f;
// half two = fp;
// half three = one - two;
// f2 = three;
// VALIDATE(f - fp == f2);
//}
return 0;
}
https://github.com/acgessler/half_float
C++ implementation of a 16 bit floating-point type mimicking most of the IEEE 754 behaviour. Compatible with the half data type used as texture format by OpenGl/Direct3D.
\ No newline at end of file
halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
be2c 3ffd bf2c 3c52 a8f6 3a6a afcf 3eca 3e47 3852 bf6e 3bc8 bff5 bc12 b266 31a4
#include <iostream>
#include <assert.h>
#include <direct.h>
#include "../../core/utilities/FlushToMem.h"
#include "../../core/getandset/ConvertDataType.h"
#include "../../XTensor.h"
#include "../../XGlobal.h"
#include "umHalf.h"
using namespace nts;
int main(int argc, char* argv[])
{
char *path;
path = getcwd(NULL, 0);
strcat(path, "\\source\\tensor\\halfLib\\HalfFloat\\dump");
XTensor a;
XTensor halfa;
int dim = 4;
int devId = 0;
InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
a.SetDataRand(-2.0, 2.0);
halfa = ConvertDataType(a, X_FLOAT16);
printf("============save model================\n");
halfa.Dump(&halfa, stderr, "halfa:");
GPUToCPUFlush(&halfa);
FILE * file = fopen(path, "wb");
halfa.Dump(file, "halfa:");
//a.Dump(file, "a");
fclose(file);
XTensor halfb;
InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
XTensor b;
InitTensor2DV2(&b, dim, dim, X_FLOAT, devId);
printf("==============read model=============\n");
FILE *read = fopen(path, "rb");
halfb.Read(read, "halfa:");
//b.Read(read, "a");
fclose(read);
halfb.Dump(&halfb, stderr, "halfb:");
return 0;
}
\ No newline at end of file
// ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h>
// For Visual Studio 6 in C++ mode wrap <wchar.h> include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#if (_MSC_VER < 1300) && defined(__cplusplus)
extern "C++" {
#endif
# include <wchar.h>
#if (_MSC_VER < 1300) && defined(__cplusplus)
}
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef int intptr_t;
typedef unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
#define INTMAX_C INT64_C
#define UINTMAX_C UINT64_C
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]
\ No newline at end of file
///////////////////////////////////////////////////////////////////////////////////
/*
Copyright (c) 2006-2008,
Chris "Krishty" Maiwald, Alexander "Aramis" Gessler
All rights reserved.
Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other
materials provided with the distribution.
* Neither the name of the class, nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior
written permission of the Development Team.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
///////////////////////////////////////////////////////////////////////////////////
#ifndef UM_HALF_H_INCLUDED
#define UM_HALF_H_INCLUDED
#include <limits>
#include <algorithm>
//#ifdef _MSC_VER
//#include "stdint.h"
//#else
//#include <stdint.h>
//#endif
#include<stdint.h>
#undef min
#undef max
///////////////////////////////////////////////////////////////////////////////////
/** 1. Represents a half-precision floating point value (16 bits) that behaves
* nearly conformant to the IEE 754 standard for floating-point computations.
*
* Not all operators have special implementations, most perform time-consuming
* conversions from half to float and back again.
* Differences to IEEE 754:
* - no difference between qnan and snan
* - no traps
* - no well-defined rounding mode
*/
///////////////////////////////////////////////////////////////////////////////////
class HalfFloat
{
friend HalfFloat operator+ (HalfFloat, HalfFloat);
friend HalfFloat operator- (HalfFloat, HalfFloat);
friend HalfFloat operator* (HalfFloat, HalfFloat);
friend HalfFloat operator/ (HalfFloat, HalfFloat);
public:
enum { BITS_MANTISSA = 10 };
enum { BITS_EXPONENT = 5 };
enum { MAX_EXPONENT_VALUE = 31 };
enum { BIAS = MAX_EXPONENT_VALUE / 2 };
enum { MAX_EXPONENT = BIAS };
enum { MIN_EXPONENT = -BIAS };
enum { MAX_EXPONENT10 = 9 };
enum { MIN_EXPONENT10 = -9 };
public:
/** Default constructor. Unitialized by default.
*/
inline HalfFloat() {}
/** Construction from an existing half
*/
inline HalfFloat(const HalfFloat& other)
: bits(other.GetBits())
{}
/** Construction from existing values for mantissa, sign
* and exponent. No validation is performed.
* @note The exponent is unsigned and biased by #BIAS
*/
inline HalfFloat(uint16_t _m, uint16_t _e, uint16_t _s);
/** Construction from a single-precision float
*/
inline HalfFloat(float other);
/** Construction from a double-precision float
*/
inline HalfFloat(const double);
/** Conversion operator to convert from half to float
*/
inline operator float() const;
/** Conversion operator to convert from half to double
*/
inline operator double() const;
/** Assignment operator to assign another half to
* *this* object.
*/
inline HalfFloat& operator= (HalfFloat other);
inline HalfFloat& operator= (float other);
inline HalfFloat& operator= (const double other);
/** Comparison operators
*/
inline bool operator== (HalfFloat other) const;
inline bool operator!= (HalfFloat other) const;
/** Relational comparison operators
*/
inline bool operator< (HalfFloat other) const;
inline bool operator> (HalfFloat other) const;
inline bool operator<= (HalfFloat other) const;
inline bool operator>= (HalfFloat other) const;
inline bool operator< (float other) const;
inline bool operator> (float other) const;
inline bool operator<= (float other) const;
inline bool operator>= (float other) const;
/** Combined assignment operators
*/
inline HalfFloat& operator += (HalfFloat other);
inline HalfFloat& operator -= (HalfFloat other);
inline HalfFloat& operator *= (HalfFloat other);
inline HalfFloat& operator /= (HalfFloat other);
inline HalfFloat& operator += (float other);
inline HalfFloat& operator -= (float other);
inline HalfFloat& operator *= (float other);
inline HalfFloat& operator /= (float other);
/** Post and prefix increment operators
*/
inline HalfFloat& operator++();
inline HalfFloat operator++(int);
/** Post and prefix decrement operators
*/
inline HalfFloat& operator--();
inline HalfFloat operator--(int);
/** Unary minus operator
*/
inline HalfFloat operator-() const;
/** Provides direct access to the bits of a half float
*/
inline uint16_t GetBits() const;
inline uint16_t& GetBits();
/** Classification of floating-point types
*/
inline bool IsNaN() const;
inline bool IsInfinity() const;
inline bool IsDenorm() const;
/** Returns the sign of the floating-point value -
* true stands for positive.
*/
inline bool GetSign() const;
public:
union
{
uint16_t bits; // All bits
struct
{
uint16_t Frac : 10; // mantissa
uint16_t Exp : 5; // exponent
uint16_t Sign : 1; // sign
} IEEE;
};
union IEEESingle
{
float Float;
struct
{
uint32_t Frac : 23;
uint32_t Exp : 8;
uint32_t Sign : 1;
} IEEE;
};
union IEEEDouble
{
double Double;
struct {
uint64_t Frac : 52;
uint64_t Exp : 11;
uint64_t Sign : 1;
} IEEE;
};
// Enums can not store 64 bit values, so we have to use static constants.
static const uint64_t IEEEDouble_MaxExpontent = 0x7FF;
static const uint64_t IEEEDouble_ExponentBias = IEEEDouble_MaxExpontent / 2;
};
/** 2. Binary operations
*/
inline HalfFloat operator+ (HalfFloat one, HalfFloat two);
inline HalfFloat operator- (HalfFloat one, HalfFloat two);
inline HalfFloat operator* (HalfFloat one, HalfFloat two);
inline HalfFloat operator/ (HalfFloat one, HalfFloat two);
inline float operator+ (HalfFloat one, float two);
inline float operator- (HalfFloat one, float two);
inline float operator* (HalfFloat one, float two);
inline float operator/ (HalfFloat one, float two);
inline float operator+ (float one, HalfFloat two);
inline float operator- (float one, HalfFloat two);
inline float operator* (float one, HalfFloat two);
inline float operator/ (float one, HalfFloat two);
///////////////////////////////////////////////////////////////////////////////////
/** 3. Specialization of std::numeric_limits for type half.
*/
///////////////////////////////////////////////////////////////////////////////////
namespace std {
template <>
class numeric_limits<HalfFloat> {
public:
// General -- meaningful for all specializations.
static const bool is_specialized = true;
static HalfFloat min()
{
return HalfFloat(0, 1, 0);
}
static HalfFloat max()
{
return HalfFloat(~0, HalfFloat::MAX_EXPONENT_VALUE - 1, 0);
}
static const int radix = 2;
static const int digits = 10; // conservative assumption
static const int digits10 = 2; // conservative assumption
static const bool is_signed = true;
static const bool is_integer = true;
static const bool is_exact = false;
static const bool traps = false;
static const bool is_modulo = false;
static const bool is_bounded = true;
// Floating point specific.
static HalfFloat epsilon()
{
return HalfFloat(0.00097656f);
} // from OpenEXR, needs to be confirmed
static HalfFloat round_error()
{
return HalfFloat(0.00097656f / 2);
}
static const int min_exponent10 = HalfFloat::MIN_EXPONENT10;
static const int max_exponent10 = HalfFloat::MAX_EXPONENT10;
static const int min_exponent = HalfFloat::MIN_EXPONENT;
static const int max_exponent = HalfFloat::MAX_EXPONENT;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const bool is_iec559 = false;
static const bool has_denorm = denorm_present;
static const bool tinyness_before = false;
static const float_round_style round_style = round_to_nearest;
static HalfFloat denorm_min()
{
return HalfFloat(1, 0, 1);
}
static HalfFloat infinity()
{
return HalfFloat(0, HalfFloat::MAX_EXPONENT_VALUE, 0);
}
static HalfFloat quiet_NaN()
{
return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
}
static HalfFloat signaling_NaN()
{
return HalfFloat(1, HalfFloat::MAX_EXPONENT_VALUE, 0);
}
};
} // end namespace std
#include "./umHalf.inl"
#ifndef UM_HALF_NO_TYPEDEFS
typedef HalfFloat float16;
typedef HalfFloat halfCPU;
#endif
#endif // !! UM_HALF_H_INCLUDED
halfa: order=2 dimsize=4,4 dtype=X_FLOAT16 dense=1.000000
bc68 342d ae59 bcd7 b46a 3c1c 2c25 beb9 bcaf 3d72 3fc2 38d0 bd6b bce4 3854 ad13
This source diff could not be displayed because it is too large. You can view the blob instead.
#include <stdio.h>
#include <direct.h>
#include "../../core/CHeader.h"
#include "../../core/utilities/FlushToMem.h"
#include "../../core/getandset/ConvertDataType.h"
#include "../../XTensor.h"
#include "../../XGlobal.h"
using namespace nts;
int main(int argc, const char ** argv) {
char *path;
path = getcwd(NULL, 0);
strcat(path, "\\source\\tensor\\halfLib\\half\\dump");
int dim = 4;
int devId = 0;
XTensor a;
XTensor b;
XTensor c;
XTensor halfa;
XTensor halfb;
XTensor halfc;
InitTensor2DV2(&a, dim, dim, X_FLOAT, devId);
InitTensor2DV2(&c, dim, dim, X_FLOAT, devId);
InitTensor2DV2(&halfb, dim, dim, X_FLOAT16, devId);
a.SetDataRand(-2.0, 2.0);
c.SetDataRand(-2.0, 2.0);
halfa = ConvertDataType(a, X_FLOAT16);
halfc = ConvertDataType(c, X_FLOAT16);
printf("============save model================\n");
halfa.Dump(&halfa, stderr, "halfa:");
GPUToCPUFlush(&halfa);
FILE * file = fopen(path, "wb");
halfa.Dump(file, "halfa:");
//a.Dump(file, "a");
fclose(file);
printf("==============read model=============\n");
FILE *read = fopen(path, "rb");
halfb.Read(read, "halfa:");
//b.Read(read, "a");
fclose(read);
halfb.Dump(&halfb, stderr, "halfb:");
printf("==============BMMUL=============\n");
b = BMMul(a, X_NOTRANS, c, X_NOTRANS);
b.Dump(stderr,"b:");
printf("==============BMMUL-float=============\n");
halfa= BMMul(halfb, X_NOTRANS, halfc, X_NOTRANS);
halfa.Dump(&halfa, stderr, "halfla:");
return 0;
}
\ No newline at end of file
#include <stdlib.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <time.h>
#include <cuda_fp16.h>
//#ifndef HALF_ARITHMETIC_TYPE
//#define HALF_ARITHMETIC_TYPE
//#endif // !HALF_ARITHMETIC_TYPE
#include "half.hpp"
using half_float::halfFloat;
typedef half_float::halfFloat halfC;
__global__ void matrixMulKernel(__half *C, __half *A, __half *B) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
C[i] = A[i] * B[i];
}
void constantInit(halfC *data, int size, halfC val) {
for (int i = 0; i < size; ++i) {
data[i] = val;
}
}
void matrixMul() {
unsigned int N = 128;
unsigned int size = N * sizeof(halfC);
halfC *h_A = (halfC*)malloc(size);
halfC *h_B = (halfC*)malloc(size);
halfC *h_C = (halfC*)malloc(size);
halfC *h_D = (halfC*)malloc(size);
// Initialize host memory
const halfC valB = (halfC)0.01f;
constantInit(h_A, N, (halfC)1.0f);
constantInit(h_B, N, valB);
__half *d_A, *d_B, *d_C;
cudaMalloc((void**)&d_A, size);
cudaMalloc((void**)&d_B, size);
cudaMalloc((void**)&d_C, size);
//copy host memory to device
cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
//config dims
dim3 block(16, 16);
dim3 grid(N / block.x, N / block.y);
// Excute the kernel
matrixMulKernel << <grid, block >> > (d_C, d_A, d_B);
// Copy the memory from device to host
cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
//printf("Checking computed result for correctness: ");
//bool correct = true;
//// test relative error by the formula
//// |<x, y>_cpu - <x,y>_gpu|/<|x|, |y|> < eps
//double eps = 1.e-6; // machine zero
for (int k = 0; k < N; k++) {
h_D[k] = h_A[k] * h_B[k];
}
for (int i = 0; i < N; i++) {
printf("%hx--%hx ", h_C[i], h_D[i]);
if ((i + 1) % 8 == 0)
printf("\n");
}
//for (int i = 0; i < width*height; i++) {
// double abs_err = fabs(h_C[i] - (width * valB));
// double dot_length = width;
// double abs_val = fabs(h_C[i]);
// double rel_err = abs_err / abs_val / dot_length;
// if (rel_err > eps)
// {
// printf("Error! Matrix[%05d]=%.8f, ref=%.8f error term is > %E\n", i, h_C[i], (float)(width*height), eps);
// correct = false;
// }
//}
//printf("%s\n", correct ? "Result = PASS" : "Result = FAIL");
// Free
free(h_A);
free(h_B);
free(h_C);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
}
int main() {
matrixMul();
}
//
//#define THREAD_NUM 256
//#define MATRIX_SIZE 4
//const halfC blocks_num = MATRIX_SIZE * (MATRIX_SIZE + THREAD_NUM - 1) / THREAD_NUM;
//
//__global__ static void matMultCUDA(const __half* a, const __half* b, __half* c, halfC n, clock_t* time)
//{
//
// //表示目前的 thread 是第几个 thread(由 0 开始计算)
// const halfC tid = threadIdx.x;
//
// //表示目前的 thread 属于第几个 block(由 0 开始计算)
// const halfC bid = blockIdx.x;
//
// //从 bid 和 tid 计算出这个 thread 应该计算的 row 和 column
// const halfC idx = bid * THREAD_NUM + tid;
// const halfC row = idx / n;
// const halfC column = idx % n;
//
// halfC i;
//
// //记录运算开始的时间
// clock_t start;
//
// //只在 thread 0(即 threadIdx.x = 0 的时候)进行记录,每个 block 都会记录开始时间及结束时间
// if (tid == 0)
// time[bid] = clock();
//
// //计算矩阵乘法
// if (row < n && column < n)
// {
// __half t = __half(0.0);
// for (i = 0; i < n; i++)
// {
// t += a[row * n + i] * b[i * n + column];
// }
// c[row * n + column] = t;
// }
//
// //计算时间,记录结果,只在 thread 0(即 threadIdx.x = 0 的时候)进行,每个 block 都会记录开始时间及结束时间
// if (tid == 0)
// {
// time[bid + blocks_num] = clock();
// }
//}
//
//bool InitCuda() {
// halfC count;
// halfC device;
// cudaGetDeviceCount(&count);
// if (count == 0) {
// fprhalfCf(stderr, "There is no device !\n");
// }
// else
// device = 1;
// cudaSetDevice(device);
// return true;
//}
//template <class T >
//void matgen(T *a, halfC n) {
// halfC i, j;
// for (i = 0; i < n; i++) {
// for (j = 0; j < n; j++) {
// a[i * n + j] = (T)rand() / (0x7FFF) + (halfC)rand() / (0x7FFF * 0x7FFF);
// }
// }
//}
//
//
//halfC main(halfC argc, char **argv) {
//
// //CUDA 初始化
// if (!InitCuda())
// return 0;
//
// //定义矩阵
// halfC *a, *b, *c, *d;
//
// halfC n = MATRIX_SIZE;
//
// //分配内存
// a = (halfC*)malloc(sizeof(halfC)* n * n);
// b = (halfC*)malloc(sizeof(halfC)* n * n);
// c = (halfC*)malloc(sizeof(halfC)* n * n);
// d = (halfC*)malloc(sizeof(halfC)* n * n);
//
// //设置随机数种子
// srand(0);
//
// //随机生成矩阵
// matgen(a, n);
// matgen(b, n);
//
// for (halfC i = 0; i < n; i++)
// {
// for (halfC j = 0; j < n; j++)
// {
// prhalfCf("%x ", a[i * n + j]);
// }
// prhalfCf("\n");
// }
//
// ///*把数据复制到显卡内存中*/
// __half *cuda_a, *cuda_b, *cuda_c;
//
// clock_t* time;
//
// //cudaMalloc 取得一块显卡内存
// cudaMalloc((void**)&cuda_a, sizeof(__half)* n * n);
// cudaMalloc((void**)&cuda_b, sizeof(__half)* n * n);
// cudaMalloc((void**)&cuda_c, sizeof(__half)* n * n);
//
// cudaMalloc((void**)&time, sizeof(clock_t)* blocks_num * 2);
//
// //cudaMemcpy 将产生的矩阵复制到显卡内存中
// //cudaMemcpyHostToDevice - 从内存复制到显卡内存
// //cudaMemcpyDeviceToHost - 从显卡内存复制到内存
// cudaMemcpy(cuda_a, a, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
// cudaMemcpy(cuda_b, b, sizeof(__half)* n * n, cudaMemcpyHostToDevice);
//
// // 在CUDA 中执行函数 语法:函数名称<<<block 数目, thread 数目, shared memory 大小>>>(参数...);
// matMultCUDA << < blocks_num, THREAD_NUM, 0 >> > (cuda_a, cuda_b, cuda_c, n, time);
//
// /*把结果从显示芯片复制回主内存*/
//
// clock_t time_use[blocks_num * 2];
//
// //cudaMemcpy 将结果从显存中复制回内存
// cudaMemcpy(c, cuda_c, sizeof(halfC)* n * n, cudaMemcpyDeviceToHost);
// cudaMemcpy(&time_use, time, sizeof(clock_t)* blocks_num * 2, cudaMemcpyDeviceToHost);
//
// for (halfC i = 0; i < n; i++)
// {
// for (halfC j = 0; j < n; j++)
// {
// prhalfCf("%x ", c[i * n + j]);
// }
// prhalfCf("\n");
// }
//
// //Free cuda
// cudaFree(cuda_a);
// cudaFree(cuda_b);
// cudaFree(cuda_c);
// cudaFree(time);
////把每个 block 最早的开始时间,和最晚的结束时间相减,取得总运行时间
//clock_t min_start, max_end;
//min_start = time_use[0];
//max_end = time_use[blocks_num];
//for (halfC i = 1; i < blocks_num; i++)
//{
// if (min_start > time_use[i]) min_start = time_use[i];
// if (max_end < time_use[i + blocks_num]) max_end = time_use[i + blocks_num];
//}
////核函数运行时间
//clock_t final_time = max_end - min_start;
////CPU矩阵乘法,存入矩阵d
//for (halfC i = 0; i < n; i++)
//{
// for (halfC j = 0; j < n; j++)
// {
// double t = 0;
// for (halfC k = 0; k < n; k++){
// t += a[i * n + k] * b[k * n + j];
// }
// d[i * n + j] = t;
// }
//}
////验证正确性与精确性
//halfC max_err = (halfC)0.0;
//halfC average_err = (halfC)0;
//for (halfC i = 0; i < n; i++)
//{
// for (halfC j = 0; j < n; j++)
// {
// if (d[i * n + j] != 0)
// {
// //fabs求浮点数x的绝对值
// halfC err = fabs((c[i * n + j] - d[i * n + j]) / d[i * n + j]);
// if (max_err < err) max_err = err;
// average_err += err;
// }
// }
//}
//prhalfCf("Max error: %g Average error: %g\n", max_err, average_err / (n * n));
//prhalfCf("gputime: %d\n", final_time);
//
// return 0;
//}
\ No newline at end of file
/*
* This implementation is extracted from PyTorch:
* Repo: github.com/pytorch/pytorch
* File: torch/lib/TH/THHalf.c
* Commit ID: 92481b59d31199df57420d4b14912348cc780d1d
* Functions are made "static inline" for performance
*/
/* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. */
// Host functions for converting between FP32 and FP16 formats
static inline void TH_halfbits2float(unsigned short* src, float* res)
{
unsigned h = *src;
unsigned sign = ((h >> 15) & 1);
unsigned exponent = ((h >> 10) & 0x1f);
unsigned mantissa = ((h & 0x3ff) << 13);
if (exponent == 0x1f) { /* NaN or Inf */
mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);
exponent = 0xff;
}
else if (!exponent) { /* Denorm or Zero */
if (mantissa) {
unsigned int msb;
exponent = 0x71;
do {
msb = (mantissa & 0x400000);
mantissa <<= 1; /* normalize */
--exponent;
} while (!msb);
mantissa &= 0x7fffff; /* 1.mantissa is implicit */
}
}
else {
exponent += 0x70;
}
*(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);
}
static inline void TH_float2halfbits(float* src, unsigned short* dest)
{
unsigned x = *(unsigned*)src;
unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;
unsigned sign, exponent, mantissa;
// Get rid of +NaN/-NaN case first.
if (u > 0x7f800000) {
*dest = 0x7fffU;
return;
}
sign = ((x >> 16) & 0x8000);
// Get rid of +Inf/-Inf, +0/-0.
if (u > 0x477fefff) {
*dest = sign | 0x7c00U;
return;
}
if (u < 0x33000001) {
*dest = (sign | 0x0000);
return;
}
exponent = ((u >> 23) & 0xff);
mantissa = (u & 0x7fffff);
if (exponent > 0x70) {
shift = 13;
exponent -= 0x70;
}
else {
shift = 0x7e - exponent;
exponent = 0;
mantissa |= 0x800000;
}
lsb = (1 << shift);
lsb_s1 = (lsb >> 1);
lsb_m1 = (lsb - 1);
// Round to nearest even.
remainder = (mantissa & lsb_m1);
mantissa >>= shift;
if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {
++mantissa;
if (!(mantissa & 0x3ff)) {
++exponent;
mantissa = 0;
}
}
*dest = (sign | (exponent << 10) | mantissa);
}
/*
* This implementation is extracted from Eigen:
* Repo: bitbucket.org/eigen/eigen
* File: Eigen/src/Core/arch/CUDA/Half.h
* Commit ID: 96e0f73a35de54f675d825bef5339b2f08e77eb4
*
* Removed a lot of redundant and cuda-specific code.
*/
#define EIGEN_STRONG_INLINE static inline
#define EIGEN_DEVICE_FUNC
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
//
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
// The original license follows:
//
// Copyright (c) Fabian Giesen, 2016
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Standard 16-bit float type, mostly useful for GPUs. Defines a new
// type Eigen::half (inheriting from CUDA's __half struct) with
// operator overloads such that it behaves basically as an arithmetic
// type. It will be quite slow on CPUs (so it is recommended to stay
// in fp32 for CPUs, except for simple parameter conversions, I/O
// to disk and the likes), but fast on GPUs.
#ifndef EIGEN_HALF_CUDA_H
#define EIGEN_HALF_CUDA_H
namespace Eigen {
namespace half_impl {
// Make our own __half definition that is similar to CUDA's.
struct __half {
EIGEN_DEVICE_FUNC __half() : x(0) {}
explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
unsigned short x;
};
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
// Conversion routines, including fallbacks for the host or older CUDA.
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of
// these in hardware. If we need more performance on older/other CPUs, they are
// also possible to vectorize directly.
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
__half h;
h.x = x;
return h;
}
union FP32 {
unsigned int u;
float f;
};
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(ff);
#elif defined(EIGEN_HAS_FP16_C)
__half h;
h.x = _cvtss_sh(ff, 0);
return h;
#else
FP32 f; f.f = ff;
const FP32 f32infty = { 255 << 23 };
const FP32 f16max = { (127 + 16) << 23 };
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
unsigned int sign_mask = 0x80000000u;
__half o;
o.x = static_cast<unsigned short>(0x0u);
unsigned int sign = f.u & sign_mask;
f.u ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
}
else { // (De)normalized number or zero
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
f.f += denorm_magic.f;
// and one integer subtract of the bias later, we have our final float!
o.x = static_cast<unsigned short>(f.u - denorm_magic.u);
}
else {
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
// update exponent, rounding bias part 1
f.u += ((unsigned int)(15 - 127) << 23) + 0xfff;
// rounding bias part 2
f.u += mant_odd;
// take the bits!
o.x = static_cast<unsigned short>(f.u >> 13);
}
}
o.x |= static_cast<unsigned short>(sign >> 16);
return o;
#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(h);
#elif defined(EIGEN_HAS_FP16_C)
return _cvtsh_ss(h.x);
#else
const FP32 magic = { 113 << 23 };
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
FP32 o;
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
unsigned int exp = shifted_exp & o.u; // just the exponent
o.u += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) { // Inf/NaN?
o.u += (128 - 16) << 23; // extra exp adjust
}
else if (exp == 0) { // Zero/Denormal?
o.u += 1 << 23; // extra exp adjust
o.f -= magic.f; // renormalize
}
o.u |= (h.x & 0x8000) << 16; // sign bit
return o.f;
#endif
}
} // end namespace half_impl
} // end namespace Eigen
#endif // EIGEN_HALF_CUDA_H
#pragma once
#include <stdint.h>
/*
* This code snippet posted by user Phernost on
* https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
*
* compress and decompress methods are made "inline" for performance
*/
class Float16Compressor
{
union Bits
{
float f;
int32_t si;
uint32_t ui;
};
static int const shift = 13;
static int const shiftSign = 16;
static int32_t const infN = 0x7F800000; // flt32 infinity
static int32_t const maxN = 0x477FE000; // max flt16 normal as a flt32
static int32_t const minN = 0x38800000; // min flt16 normal as a flt32
static int32_t const signN = 0x80000000; // flt32 sign bit
static int32_t const infC = infN >> shift;
static int32_t const nanN = (infC + 1) << shift; // minimum flt16 nan as a flt32
static int32_t const maxC = maxN >> shift;
static int32_t const minC = minN >> shift;
static int32_t const signC = signN >> shiftSign; // flt16 sign bit
static int32_t const mulN = 0x52000000; // (1 << 23) / minN
static int32_t const mulC = 0x33800000; // minN / (1 << (23 - shift))
static int32_t const subC = 0x003FF; // max flt32 subnormal down shifted
static int32_t const norC = 0x00400; // min flt32 normal down shifted
static int32_t const maxD = infC - maxC - 1;
static int32_t const minD = minC - subC - 1;
public:
inline static uint16_t compress(float value)
{
Bits v, s;
v.f = value;
uint32_t sign = v.si & signN;
v.si ^= sign;
sign >>= shiftSign; // logical shift
s.si = mulN;
s.si = s.f * v.f; // correct subnormals
v.si ^= (s.si ^ v.si) & -(minN > v.si);
v.si ^= (infN ^ v.si) & -((infN > v.si) & (v.si > maxN));
v.si ^= (nanN ^ v.si) & -((nanN > v.si) & (v.si > infN));
v.ui >>= shift; // logical shift
v.si ^= ((v.si - maxD) ^ v.si) & -(v.si > maxC);
v.si ^= ((v.si - minD) ^ v.si) & -(v.si > subC);
return v.ui | sign;
}
inline static float decompress(uint16_t value)
{
Bits v;
v.ui = value;
int32_t sign = v.si & signC;
v.si ^= sign;
sign <<= shiftSign;
v.si ^= ((v.si + minD) ^ v.si) & -(v.si > subC);
v.si ^= ((v.si + maxD) ^ v.si) & -(v.si > maxC);
Bits s;
s.si = mulC;
s.f *= v.si;
int32_t mask = -(norC > v.si);
v.si <<= shift;
v.si ^= (s.si ^ v.si) & mask;
v.si |= sign;
return v.f;
}
};
\ No newline at end of file
/*
* This implementation is extracted from numpy:
* Repo: github.com/numpy/numpy
* File: numpy/core/src/npymath/halffloat.c
* Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7
* Functions are made "static inline" for performance, and
* non-conversion functions are removed, and generation of
* exceptions is disabled.
*/
#include <cstdint>
typedef uint16_t npy_uint16;
typedef uint32_t npy_uint32;
typedef uint64_t npy_uint64;
/*
* This chooses between 'ties to even' and 'ties away from zero'.
*/
#define NPY_HALF_ROUND_TIES_TO_EVEN 1
/*
* If these are 1, the conversions try to trigger underflow,
* overflow, and invalid exceptions in the FP system when needed.
*/
#define NPY_HALF_GENERATE_OVERFLOW 0
#define NPY_HALF_GENERATE_UNDERFLOW 0
#define NPY_HALF_GENERATE_INVALID 0
/*
********************************************************************
* BIT-LEVEL CONVERSIONS *
********************************************************************
*/
static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
{
npy_uint32 f_exp, f_sig;
npy_uint16 h_sgn, h_exp, h_sig;
h_sgn = (npy_uint16)((f & 0x80000000u) >> 16);
f_exp = (f & 0x7f800000u);
/* Exponent overflow/NaN converts to signed inf/NaN */
if (f_exp >= 0x47800000u) {
if (f_exp == 0x7f800000u) {
/* Inf or NaN */
f_sig = (f & 0x007fffffu);
if (f_sig != 0) {
/* NaN - propagate the flag in the significand... */
npy_uint16 ret = (npy_uint16)(0x7c00u + (f_sig >> 13));
/* ...but make sure it stays a NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
}
else {
/* signed inf */
return (npy_uint16)(h_sgn + 0x7c00u);
}
}
else {
/* overflow to signed inf */
#if NPY_HALF_GENERATE_OVERFLOW
npy_set_floatstatus_overflow();
#endif
return (npy_uint16)(h_sgn + 0x7c00u);
}
}
/* Exponent underflow converts to a subnormal half or signed zero */
if (f_exp <= 0x38000000u) {
/*
* Signed zeros, subnormal floats, and floats with small
* exponents all convert to signed zero halfs.
*/
if (f_exp < 0x33000000u) {
#if NPY_HALF_GENERATE_UNDERFLOW
/* If f != 0, it underflowed to 0 */
if ((f & 0x7fffffff) != 0) {
npy_set_floatstatus_underflow();
}
#endif
return h_sgn;
}
/* Make the subnormal significand */
f_exp >>= 23;
f_sig = (0x00800000u + (f & 0x007fffffu));
#if NPY_HALF_GENERATE_UNDERFLOW
/* If it's not exactly represented, it underflowed */
if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
npy_set_floatstatus_underflow();
}
#endif
f_sig >>= (113 - f_exp);
/* Handle rounding by adding 1 to the bit beyond half precision */
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((f_sig & 0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
#else
f_sig += 0x00001000u;
#endif
h_sig = (npy_uint16)(f_sig >> 13);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp from zero to one and h_sig will be zero.
* This is the correct result.
*/
return (npy_uint16)(h_sgn + h_sig);
}
/* Regular case with no overflow or underflow */
h_exp = (npy_uint16)((f_exp - 0x38000000u) >> 13);
/* Handle rounding by adding 1 to the bit beyond half precision */
f_sig = (f & 0x007fffffu);
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((f_sig & 0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
#else
f_sig += 0x00001000u;
#endif
h_sig = (npy_uint16)(f_sig >> 13);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp by one and h_sig will be zero. This is the
* correct result. h_exp may increment to 15, at greatest, in
* which case the result overflows to a signed inf.
*/
#if NPY_HALF_GENERATE_OVERFLOW
h_sig += h_exp;
if (h_sig == 0x7c00u) {
npy_set_floatstatus_overflow();
}
return h_sgn + h_sig;
#else
return h_sgn + h_exp + h_sig;
#endif
}
static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
{
npy_uint64 d_exp, d_sig;
npy_uint16 h_sgn, h_exp, h_sig;
h_sgn = (d & 0x8000000000000000ULL) >> 48;
d_exp = (d & 0x7ff0000000000000ULL);
/* Exponent overflow/NaN converts to signed inf/NaN */
if (d_exp >= 0x40f0000000000000ULL) {
if (d_exp == 0x7ff0000000000000ULL) {
/* Inf or NaN */
d_sig = (d & 0x000fffffffffffffULL);
if (d_sig != 0) {
/* NaN - propagate the flag in the significand... */
npy_uint16 ret = (npy_uint16)(0x7c00u + (d_sig >> 42));
/* ...but make sure it stays a NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
}
else {
/* signed inf */
return h_sgn + 0x7c00u;
}
}
else {
/* overflow to signed inf */
#if NPY_HALF_GENERATE_OVERFLOW
npy_set_floatstatus_overflow();
#endif
return h_sgn + 0x7c00u;
}
}
/* Exponent underflow converts to subnormal half or signed zero */
if (d_exp <= 0x3f00000000000000ULL) {
/*
* Signed zeros, subnormal floats, and floats with small
* exponents all convert to signed zero halfs.
*/
if (d_exp < 0x3e60000000000000ULL) {
#if NPY_HALF_GENERATE_UNDERFLOW
/* If d != 0, it underflowed to 0 */
if ((d & 0x7fffffffffffffffULL) != 0) {
npy_set_floatstatus_underflow();
}
#endif
return h_sgn;
}
/* Make the subnormal significand */
d_exp >>= 52;
d_sig = (0x0010000000000000ULL + (d & 0x000fffffffffffffULL));
#if NPY_HALF_GENERATE_UNDERFLOW
/* If it's not exactly represented, it underflowed */
if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
npy_set_floatstatus_underflow();
}
#endif
d_sig >>= (1009 - d_exp);
/* Handle rounding by adding 1 to the bit beyond half precision */
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
d_sig += 0x0000020000000000ULL;
}
#else
d_sig += 0x0000020000000000ULL;
#endif
h_sig = (npy_uint16)(d_sig >> 42);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp from zero to one and h_sig will be zero.
* This is the correct result.
*/
return h_sgn + h_sig;
}
/* Regular case with no overflow or underflow */
h_exp = (npy_uint16)((d_exp - 0x3f00000000000000ULL) >> 42);
/* Handle rounding by adding 1 to the bit beyond half precision */
d_sig = (d & 0x000fffffffffffffULL);
#if NPY_HALF_ROUND_TIES_TO_EVEN
/*
* If the last bit in the half significand is 0 (already even), and
* the remaining bit pattern is 1000...0, then we do not add one
* to the bit after the half significand. In all other cases, we do.
*/
if ((d_sig & 0x000007ffffffffffULL) != 0x0000020000000000ULL) {
d_sig += 0x0000020000000000ULL;
}
#else
d_sig += 0x0000020000000000ULL;
#endif
h_sig = (npy_uint16)(d_sig >> 42);
/*
* If the rounding causes a bit to spill into h_exp, it will
* increment h_exp by one and h_sig will be zero. This is the
* correct result. h_exp may increment to 15, at greatest, in
* which case the result overflows to a signed inf.
*/
#if NPY_HALF_GENERATE_OVERFLOW
h_sig += h_exp;
if (h_sig == 0x7c00u) {
npy_set_floatstatus_overflow();
}
return h_sgn + h_sig;
#else
return h_sgn + h_exp + h_sig;
#endif
}
static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
{
npy_uint16 h_exp, h_sig;
npy_uint32 f_sgn, f_exp, f_sig;
h_exp = (h & 0x7c00u);
f_sgn = ((npy_uint32)h & 0x8000u) << 16;
switch (h_exp) {
case 0x0000u: /* 0 or subnormal */
h_sig = (h & 0x03ffu);
/* Signed zero */
if (h_sig == 0) {
return f_sgn;
}
/* Subnormal */
h_sig <<= 1;
while ((h_sig & 0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
f_sig = ((npy_uint32)(h_sig & 0x03ffu)) << 13;
return f_sgn + f_exp + f_sig;
case 0x7c00u: /* inf or NaN */
/* All-ones exponent and a copy of the significand */
return f_sgn + 0x7f800000u + (((npy_uint32)(h & 0x03ffu)) << 13);
default: /* normalized */
/* Just need to adjust the exponent and shift */
return f_sgn + (((npy_uint32)(h & 0x7fffu) + 0x1c000u) << 13);
}
}
static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
{
npy_uint16 h_exp, h_sig;
npy_uint64 d_sgn, d_exp, d_sig;
h_exp = (h & 0x7c00u);
d_sgn = ((npy_uint64)h & 0x8000u) << 48;
switch (h_exp) {
case 0x0000u: /* 0 or subnormal */
h_sig = (h & 0x03ffu);
/* Signed zero */
if (h_sig == 0) {
return d_sgn;
}
/* Subnormal */
h_sig <<= 1;
while ((h_sig & 0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
d_sig = ((npy_uint64)(h_sig & 0x03ffu)) << 42;
return d_sgn + d_exp + d_sig;
case 0x7c00u: /* inf or NaN */
/* All-ones exponent and a copy of the significand */
return d_sgn + 0x7ff0000000000000ULL +
(((npy_uint64)(h & 0x03ffu)) << 42);
default: /* normalized */
/* Just need to adjust the exponent and shift */
return d_sgn + (((npy_uint64)(h & 0x7fffu) + 0xfc000u) << 42);
}
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论