我正在将Michael Herf's radix sort翻译成直接的C代码。对于正值,它可以正常工作,但负值会被破坏,如下所示:
Input:
-0.100000 0.100000 0.000000 0.700000 0.900000 -0.400000 -1.000000 0.200000 -0.400000 -0.700000
Expected output:
-1.000000 -0.700000 -0.400000 -0.400000 -0.100000 0.000000 0.100000 0.200000 0.700000 0.900000
Actual output:
-44.799999 -11.200000 -11.200000 -6.400000 -4.000000 0.000000 0.100000 0.200000 0.700000 0.900000
如何解决这个问题?
代码(在Eclipse CDT上,Ubuntu 16.10,使用GCC 6.2编译):
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <xmmintrin.h> // for prefetch
#define PFVAL 64
#define PFVAL2 128
#define PF(x, i) _mm_prefetch((char *)((x)+(i)+PFVAL), 0)
#define PF2(x, i) _mm_prefetch((char *)((x)+(i)+PFVAL2), 0)
#define HISTOGRAM 2048
#define HISTOGRAMS 6144 // 3*histogram
#define FLIP_FLOAT_A(x) ((x)^(((x) >> 31) | 0x80000000))
#define FLIP_FLOAT_B(x) ((x)^((((x) >> 31)-1) | 0x80000000))
// utils for accessing 11-bit quantities
#define _0(x) (x & 0x7FF)
#define _1(x) (x >> 11 & 0x7FF)
#define _2(x) (x >> 22 )
void radixsort_float(float *a, size_t num) {
uint32_t *base = (uint32_t *)a;
size_t arraysize = num*sizeof(uint32_t);
uint32_t *aux = malloc(arraysize);
uint32_t byte0[HISTOGRAMS] = { 0 };
uint32_t *byte1 = byte0+HISTOGRAM;
uint32_t *byte2 = byte1+HISTOGRAM;
// 1. parallel histogramming pass
for(size_t i = 0; i < num; ++i) {
PF(base, i);
uint32_t bi = FLIP_FLOAT_A(base[i]);
++byte0[_0(bi)];
++byte1[_1(bi)];
++byte2[_2(bi)];
}
// 2. sum the histograms: each histogram entry
// records the number of values preceding itself
uint32_t sum0 = 0, sum1 = 0, sum2 = 0;
uint32_t total;
for(size_t i = 0; i < HISTOGRAM; i++) {
total = byte0[i]+sum0;
byte0[i] = sum0-1;
sum0 = total;
total = byte1[i]+sum1;
byte1[i] = sum1-1;
sum1 = total;
total = byte2[i]+sum2;
byte2[i] = sum2-1;
sum2 = total;
}
// byte 0: floatflip entire value, read/write histogram, write out flipped
for(size_t i = 0; i < num; ++i) {
uint32_t bi = FLIP_FLOAT_A(base[i]);
uint32_t pos = _0(bi);
PF2(base, i);
aux[++byte0[pos]] = bi;
}
// byte 1: read/write histogram, copy
// aux -> array
for(size_t i = 0; i < num; ++i) {
uint32_t ai = aux[i];
uint32_t pos = _1(ai);
PF2(aux, i);
base[++byte1[pos]] = ai;
}
// byte 2: read/write histogram, copy & flip out
// base -> aux
for(size_t i = 0; i < num; ++i) {
uint32_t bi = base[i];
uint32_t pos = _2(bi);
PF2(base, i);
aux[++byte2[pos]] = FLIP_FLOAT_B(bi);
}
memcpy(base, aux, arraysize);
free(aux);
}
答案 0 :(得分:1)
问题是你的FLIP_FLOAT_A宏 - 这是错的。你需要它来翻转负数的所有位(不仅仅是底部位),而且宏需要相反(对FLIP_FLOAT_B(FLIP_FLOAT_A(X)) == X
的所有值都是X
)。尝试:
#define FLIP_FLOAT_A(x) ((x)^(((~(x) >> 31)-1) | 0x80000000))
#define FLIP_FLOAT_B(x) ((x)^((((x) >> 31)-1) | 0x80000000))