为了获得乐趣和了解更多关于浮动的工作原理,我尝试创建一个函数,它接受两个单精度浮动,并将它们相加。
到目前为止,我所做的对于同一个符号数字来说是完美的,但是当这些数字有相反的符号时,它就会崩溃。我浏览了许多问题和网站(UAF,How do you add 8-bit floating point with different signs,ICL,Adding 32 bit floating point numbers.,How to add and subtract 16 bit floating point half precision numbers?,How to subtract IEEE 754 numbers?,this,this),但提出减法的网站大多描述它有点像“基本相同,但减法相反”,我没有发现它有什么特别的帮助联合空军说
负尾数的处理方法是先转换成2的补码,然后执行加法。执行加法后,结果将转换回符号大小形式。
但我好像不知道怎么做。我发现了和这解释了什么是有符号量,以及如何在它和2的补码之间进行转换,所以我试着这样转换:
manz = manx + ( ( (many | 0x01000000) ^ 0x007FFFFF) + 1);
像这样:
manz = manx + ( ( (many | 0x01000000) ^ 0x007FFFFF) + 1);
manz = ( ((manz - 1) ^ 0x007FFFFF) & 0xFEFFFFFF);
但这两个都没用。
试着用其他资料描述的减法,我试着用不同的方法否定负数的尾数,比如:
manz = manx - many;
manz = manx + (many - (1<<23));
manz = manx + (many - (1<<24));
manz = manx + ( (many - (1<<23)) & 0x007FFFFF );
manz = manx + ( (many - (1<<23)) + 1);
manz = manx + ( (~many & 0x007FFFFF) + 1);
manz = manx + (~many + 1);
manz = manx + ( (many ^ 0x007FFFFF) + 1);
manz = manx + ( (many ^ 0x00FFFFFF) + 1);
manz = manx + ( (many ^ 0x003FFFFF) + 1);
这是根据符号处理加法的语句,它在尾数对齐之后:
expz = expy;
if(signx != signy) { // opp sign
if(manx < many) {
signz = signy;
manz = many + ((manx ^ 0x007FFFFF) + 1);
} else if(manx > many) {
signz = signx;
manz = manx - ((many ^ 0x007FFFFF) + 1);
} else { // x == y
signz = 0x00000000;
expz = 0x00000000;
manz = 0x00000000;
}
} else {
signz = signx;
manz = manx + many;
}
这是紧跟在它后面的代码,它在溢出时规范化数字,当它们有相同的符号时,它工作,但我不确定它的工作方式在减去时是否有意义:
if(manz & 0x01000000) {
expz++;
manz = (manz >> 1) + (manz & 0x1);
}
manz &= 0x007FFFFF;
通过测试值
-3.34632F
和34.8532413F
,我得到了应该0x427E0716
(63.506920
)的答案;通过测试值0x41FC0E2D
和31.506922
,我得到了应该3.34632F
(-34.8532413F
)的答案。我可以通过改变在相减时规范化浮点数的方式来解决我的问题。
expz = expy;
if(signx != signy) { // opp sign
if(manx < many) {
signz = signy;
manz = many - manx;
} else if(manx > many) {
signz = signx;
manz = manx - many;
} else { // x == y
signz = 0x00000000;
expz = 0x00000000;
manz = 0x00000000;
}
// Normalize subtraction
while((manz & 0x00800000) == 0 && manz) {
manz <<= 1;
expz--;
}
} else {
signz = signx;
manz = manx + many;
// Normalize addition
if(manz & 0x01000000) {
expz++;
manz = (manz >> 1) + ( (x & 0x2) ? (x & 0x1) : 0 ); // round even
}
}
manz &= 0x007FFFFF;
最佳答案
如何添加两个符号相反的浮点数?
基本上你没有。
对于那些不能依赖“双补换行溢出”(例如浮点、大数库等)的数值类型,您总是会得到如下结果:
add_signed(v1, v2) {
if( v1 < 0) {
if( v2 < 0) {
// Both negative
return -add_unsigned(-v1, -v2);
} else {
// Different sign, v1 is negative
return subtract_unsigned(v2, -v1);
}
} else {
if( v2 < 0) {
// Different sign, v2 is negative
return subtract_unsigned(v1, -v2);
} else {
// Both positive
return add_unsigned(v1, v2);
}
}
}
subtract_signed(v1, v2) {
return add_signed(v1, -v2);
}
add_unsigned(v1, v2) {
// Here we know that v1 and v2 will never be negative, and
// we know that the result will never be negative
...
}
subtract_unsigned(v1, v2) {
if(v1 < v2) {
return -subtract_unsigned(v2, v1);
}
// Here we know that v1 and v2 will never be negative, and
// we know that the result will never be negative
...
}
换言之,所有实际的加法和所有实际的减法都是用无符号(“永不为负”)数字来实现的。
仅添加32位浮点模拟的更完整示例(在C语言中,未经测试且可能有错误,可能或可能不适用于非规范化,不支持“NaN/S”或无穷大,不支持溢出或下溢,不支持“舍入前左移尾数以减少精度损失”,也不支持不同的舍入模式,而不是“舍入”朝向零“):
#define SIGN_FLAG 0x80000000U
#define EXPONENT_MASK 0x7F800000U
#define MANTISSA_MASK 0x007FFFFFU
#define IMPLIED_BIT 0x00800000U
#define OVERFLOW_BIT 0x01000000U
#define EXPONENT_ONE 0x00800000U
uint32_t add_signed(uint32_t v1, uint32_t v2) {
if( (v1 & SIGN_FLAG) != 0) {
if( (v2 & SIGN_FLAG) != 0) {
// Both negative
return SIGN_FLAG | add_unsigned(v1 & ~SIGN_FLAG, v2 & ~SIGN_FLAG);
} else {
// Different sign, v1 is negative
return subtract_unsigned(v2, v1 & ~SIGN_FLAG);
}
} else {
if( (v2 & SIGN_FLAG) != 0) {
// Different sign, v2 is negative
return subtract_unsigned(v1, v2 & ~SIGN_FLAG);
} else {
// Both positive
return add_unsigned(v1, v2);
}
}
}
uint32_t subtract_signed(uint32_t v1, uint32_t v2) {
return add_signed(v1, v2 ^ SIGN_FLAG);
}
uint32_t add_unsigned(uint32_t v1, uint32_t v2) {
// Here we know that v1 and v2 will never be negative, and
// we know that the result will never be negative
if(v1 < v2) { // WARNING: Compares both exponents and mantissas
return add_unsigned(v2, v1);
}
// Here we know the exponent of v1 is not smaller than the exponent of v2
uint32_t m1 = (v1 & MANTISSA_MASK) | IMPLIED_BIT;
uint32_t m2 = (v2 & MANTISSA_MASK) | IMPLIED_BIT;
uint32_t exp2 = v2 & EXPONENT_MASK;
uint32_t expr = v1 & EXPONENT_MASK;
while(exp2 < expr) {
m2 >>= 1;
exp2 += EXPONENT_ONE;
}
uint32_t mr = m1+m2;
if( (mr & OVERFLOW_BIT) != 0) {
mr >> 1;
expr += EXPONENT_ONE;
}
return expr | (mr & ~IMPLIED_BIT);
}
uint32_t subtract_unsigned(uint32_t v1, uint32_t v2) {
if(v1 == v2) {
return 0;
}
if(v1 < v2) {
return SIGN_FLAG ^ subtract_unsigned(v2, v1);
}
// Here we know the exponent of v1 is not smaller than the exponent of v2,
// and that (if exponents are equal) the mantissa of v1 is larger
// than the mantissa of v2; and therefore the result will be
// positive
uint32_t m1 = (v1 & MANTISSA_MASK) | IMPLIED_BIT;
uint32_t m2 = (v2 & MANTISSA_MASK) | IMPLIED_BIT;
uint32_t exp2 = v2 & EXPONENT_MASK;
uint32_t expr = v1 & EXPONENT_MASK;
while(exp2 < expr) {
m2 >>= 1;
exp2 += EXPONENT_ONE;
}
uint32_t mr = m1-m2;
while( (mr & IMPLIED_BIT) == 0) {
mr <<= 1;
expr -= EXPONENT_ONE;
}
return expr | (mr & ~IMPLIED_BIT);
}
关于c - 如何将两个具有相反符号的浮点数相加?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/58384951/