在MSVC win32 / win64中,从扩展精度浮点数(80位值,在某些编译器中也称为“长双精度”)到双精度(64位)转换的最便携和“正确”方法是什么?
MSVC目前(截至2010年)假设“long double”是“double”的同义词。
我可能在内联asm中编写fld / fstp汇编程序对,但内联asm不适用于MSVC中的win64代码。我是否需要将此汇编代码移动到单独的.asm文件中?这真的是没有好的解决方案吗?
答案 0 :(得分:4)
如果您的编译器/平台没有80位浮点值的本机支持,则必须自己解码该值。
假设80位浮点数存储在位于特定偏移量的字节缓冲区中,您可以这样做:
float64 C_IOHandler::readFloat80(IColl<uint8> buffer, uint32 *ref_offset)
{
uint32 &offset = *ref_offset;
//80 bit floating point value according to the IEEE-754 specification and the Standard Apple Numeric Environment specification:
//1 bit sign, 15 bit exponent, 1 bit normalization indication, 63 bit mantissa
float64 sign;
if ((buffer[offset] & 0x80) == 0x00)
sign = 1;
else
sign = -1;
uint32 exponent = (((uint32)buffer[offset] & 0x7F) << 8) | (uint32)buffer[offset + 1];
uint64 mantissa = readUInt64BE(buffer, offset + 2);
//If the highest bit of the mantissa is set, then this is a normalized number.
float64 normalizeCorrection;
if ((mantissa & 0x8000000000000000) != 0x00)
normalizeCorrection = 1;
else
normalizeCorrection = 0;
mantissa &= 0x7FFFFFFFFFFFFFFF;
offset += 10;
//value = (-1) ^ s * (normalizeCorrection + m / 2 ^ 63) * 2 ^ (e - 16383)
return (sign * (normalizeCorrection + (float64)mantissa / ((uint64)1 << 63)) * g_Math->toPower(2, (int32)exponent - 16383));
}
我就是这样做的,并且用g ++ 4.5.0编译得很好。它当然不是一个非常快速的解决方案,但至少是一个功能性解决方案。这段代码也应该可以移植到不同的平台上,虽然我没试过。
答案 1 :(得分:4)
刚刚在x86代码中执行此操作...
.686P
.XMM
_TEXT SEGMENT
EXTRN __fltused:DWORD
PUBLIC _cvt80to64
PUBLIC _cvt64to80
_cvt80to64 PROC
mov eax, dword ptr [esp+4]
fld TBYTE PTR [eax]
ret 0
_cvt80to64 ENDP
_cvt64to80 PROC
mov eax, DWORD PTR [esp+12]
fld QWORD PTR [esp+4]
fstp TBYTE PTR [eax]
ret 0
_cvt64to80 ENDP
ENDIF
_TEXT ENDS
END
答案 2 :(得分:2)
我刚刚写了这篇文章。它使用位操作从IEEE扩展精度数构造IEEE双数。它采用小端格式的10字节扩展精度数:
typedef unsigned long long uint64;
double makeDoubleFromExtended(const unsigned char x[10])
{
int exponent = (((x[9] << 8) | x[8]) & 0x7FFF);
uint64 mantissa =
((uint64)x[7] << 56) | ((uint64)x[6] << 48) | ((uint64)x[5] << 40) | ((uint64)x[4] << 32) |
((uint64)x[3] << 24) | ((uint64)x[2] << 16) | ((uint64)x[1] << 8) | (uint64)x[0];
unsigned char d[8] = {0};
double result;
d[7] = x[9] & 0x80; /* Set sign. */
if ((exponent == 0x7FFF) || (exponent == 0))
{
/* Infinite, NaN or denormal */
if (exponent == 0x7FFF)
{
/* Infinite or NaN */
d[7] |= 0x7F;
d[6] = 0xF0;
}
else
{
/* Otherwise it's denormal. It cannot be represented as double. Translate as singed zero. */
memcpy(&result, d, 8);
return result;
}
}
else
{
/* Normal number. */
exponent = exponent - 0x3FFF + 0x03FF; /*< exponent for double precision. */
if (exponent <= -52) /*< Too small to represent. Translate as (signed) zero. */
{
memcpy(&result, d, 8);
return result;
}
else if (exponent < 0)
{
/* Denormal, exponent bits are already zero here. */
}
else if (exponent >= 0x7FF) /*< Too large to represent. Translate as infinite. */
{
d[7] |= 0x7F;
d[6] = 0xF0;
memset(d, 0x00, 6);
memcpy(&result, d, 8);
return result;
}
else
{
/* Representable number */
d[7] |= (exponent & 0x7F0) >> 4;
d[6] |= (exponent & 0xF) << 4;
}
}
/* Translate mantissa. */
mantissa >>= 11;
if (exponent < 0)
{
/* Denormal, further shifting is required here. */
mantissa >>= (-exponent + 1);
}
d[0] = mantissa & 0xFF;
d[1] = (mantissa >> 8) & 0xFF;
d[2] = (mantissa >> 16) & 0xFF;
d[3] = (mantissa >> 24) & 0xFF;
d[4] = (mantissa >> 32) & 0xFF;
d[5] = (mantissa >> 40) & 0xFF;
d[6] |= (mantissa >> 48) & 0x0F;
memcpy(&result, d, 8);
printf("Result: 0x%016llx", *(uint64*)(&result) );
return result;
}
答案 3 :(得分:0)
使用给定的答案并最终得到了这个。
#include <cmath>
#include <limits>
#include <cassert>
#ifndef _M_X64
__inline __declspec(naked) double _cvt80to64(void* ) {
__asm {
// PUBLIC _cvt80to64 PROC
mov eax, dword ptr [esp+4]
fld TBYTE PTR [eax]
ret 0
// _cvt80to64 ENDP
}
}
#endif
#pragma pack(push)
#pragma pack(2)
typedef unsigned char tDouble80[10];
#pragma pack(pop)
typedef struct {
unsigned __int64 mantissa:64;
unsigned int exponent:15;
unsigned int sign:1;
} tDouble80Struct;
inline double convertDouble80(const tDouble80& val)
{
assert(10 == sizeof(tDouble80));
const tDouble80Struct* valStruct = reinterpret_cast<const tDouble80Struct*>(&val);
const unsigned int mask_exponent = (1 << 15) - 1;
const unsigned __int64 mantissa_high_highestbit = unsigned __int64(1) << 63;
const unsigned __int64 mask_mantissa = (unsigned __int64(1) << 63) - 1;
if (mask_exponent == valStruct->exponent) {
if(0 == valStruct->mantissa) {
return (0 != valStruct->sign) ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity();
}
// highest mantissa bit set means quiet NaN
return (0 != (mantissa_high_highestbit & valStruct->mantissa)) ? std::numeric_limits<double>::quiet_NaN() : std::numeric_limits<double>::signaling_NaN();
}
// 80 bit floating point value according to the IEEE-754 specification and
// the Standard Apple Numeric Environment specification:
// 1 bit sign, 15 bit exponent, 1 bit normalization indication, 63 bit mantissa
const double sign(valStruct->sign ? -1 : 1);
//If the highest bit of the mantissa is set, then this is a normalized number.
unsigned __int64 mantissa = valStruct->mantissa;
double normalizeCorrection = (mantissa & mantissa_high_highestbit) != 0 ? 1 : 0;
mantissa &= mask_mantissa;
//value = (-1) ^ s * (normalizeCorrection + m / 2 ^ 63) * 2 ^ (e - 16383)
return (sign * (normalizeCorrection + double(mantissa) / mantissa_high_highestbit) * pow(2.0, int(valStruct->exponent) - 16383));
}