Question

我花了更多10小时+尝试在LSD基数排序中对以下（十六进制）进行排序，但无济于事。网上关于这个主题的材料非常少。

0 4c7f cd80 41fc 782c 8b74 7eb1 9a03 aa01 73f1

我知道我必须屏蔽并执行按位操作来处理每个十六进制数字（4位），但不知道如何以及在何处。

中的代码（我理解）

void rsort(int a[], int n) {
    int max = getMax(a, n);
    for (int exp = 1; max / exp > 0; exp *= 10) {   
        ccsort(a, n, exp);
    }
}

int getMax(int a[], int n) {
    int max = a[0];
    int i = 0;
    for (i = 0; i < n; i++) {
        if (a[i] > max) {
            max = a[i];
        }
    }
    return max;
}

void ccsort(int a[], int n, int exp) {

    int count[n];
    int output[n];
    int i = 0;

    for (i = 0; i < n; i++) {
        count[i] = 0;
        output[i] = 0;
    }
    for (i = 0; i < n; i++) {
        ++count[(a[i] / exp) % 10];
    }
    for (i = 1; i <= n; i++) {
        count[i] += count[i - 1];
    }
    for (i = n - 1; i >= 0; i--) {
        output[count[(a[i] / exp) % 10] - 1] = a[i];
        --count[(a[i] / exp) % 10];
    }
    for (i = 0; i < n; i++) {
        a[i] = output[i];
    }
}

我已经检查了所有关于这个问题的StackOverFlow，但是它们都没有涵盖细节。

Answer 1

有一种更简单的方法来实现基数排序。在检查最大值之后，找到16> =最大值的最低功率。这可以在循环中以max>＆gt; = 4来完成，递增x使得当max变为零时，则16到幂x是> =原始最大值。例如，最大值为0xffff需要4个基数排序，而最大值为0xffffffff需要8个基数排序过程。

如果值的范围最有可能采用整数可用的全范围，则无需确定最大值，只需将基数排序基于整数大小。

您拥有的示例代码显示了一个基数排序，由于计数转换为索引的方式向后扫描数组。通过使用替代方法将计数转换为索引可以避免这种情况。以下是32位无符号整数的基本256基数排序示例。它使用一个计数/索引矩阵，这样所有4行计数只用一个数组读取通道生成，然后是4个基数排序通道（因此排序后的数据最终返回原始数组）。 std :: swap是交换指针的C ++函数，对于C程序，可以通过内联交换指针来替换它。 t = a; a = b; b = t，其中t的类型为uint32_t *（ptr为无符号32位整数）。对于基数为16的基数排序，矩阵大小为[8] [16]。

//  a is input array, b is working array
uint32_t * RadixSort(uint32_t * a, uint32_t *b, size_t count)
{
size_t mIndex[4][256] = {0};            // count / index matrix
size_t i,j,m,n;
uint32_t u;
    for(i = 0; i < count; i++){         // generate histograms
        u = a[i];
        for(j = 0; j < 4; j++){
            mIndex[j][(size_t)(u & 0xff)]++;
            u >>= 8;
        }       
    }
    for(j = 0; j < 4; j++){             // convert to indices
        m = 0;
        for(i = 0; i < 256; i++){
            n = mIndex[j][i];
            mIndex[j][i] = m;
            m += n;
        }       
    }
    for(j = 0; j < 4; j++){             // radix sort
        for(i = 0; i < count; i++){     //  sort by current lsb
            u = a[i];
            m = (size_t)(u>>(j<<3))&0xff;
            b[mIndex[j][m]++] = u;
        }
        std::swap(a, b);                //  swap ptrs
    }
    return(a);
}

Answer 2

您对基数排序的实现略有不正确：

它无法处理负数
功能count[]中的数组ccsort()的大小应为10，而不是n。如果n小于10，则该功能无效。
累积计数的循环过了一步：for (i = 1; i <= n; i++)。 <=运算符再次导致错误。
你说你按十六进制数字排序，但代码使用十进制数字。

这是一个（稍微）改进的版本，带有解释：

void ccsort(int a[], int n, int exp) {

    int count[10] = { 0 };
    int output[n];
    int i, last;

    for (i = 0; i < n; i++) {
        // compute the number of entries with any given digit at level exp
        ++count[(a[i] / exp) % 10];
    }
    for (i = last = 0; i < 10; i++) {
        // update the counts to have the index of the place to dispatch the next
        // number with a given digit at level exp
        last += count[i];
        count[i] = last - count[i];
    }
    for (i = 0; i < n; i++) {
        // dispatch entries at the right index for its digit at level exp
        output[count[(a[i] / exp) % 10]++] = a[i];
    }
    for (i = 0; i < n; i++) {
        // copy entries batch to original array
        a[i] = output[i];
    }
}

int getMax(int a[], int n) {
    // find the largest number in the array
    int max = a[0];
    for (int i = 1; i < n; i++) {
        if (a[i] > max) {
            max = a[i];
        }
    }
    return max;
}

void rsort(int a[], int n) {
    int max = getMax(a, n);
    // for all digits required to express the maximum value
    for (int exp = 1; max / exp > 0; exp *= 10) {   
        // sort the array on one digit at a time
        ccsort(a, n, exp);
    }
}

由于所有分区和模运算，上述版本的效率非常低。可以使用移位和遮罩执行十六进制数字：

void ccsort16(int a[], int n, int shift) {

    int count[16] = { 0 };
    int output[n];
    int i, last;

    for (i = 0; i < n; i++) {
        ++count[(a[i] >> shift) & 15];
    }
    for (i = last = 0; i < 16; i++) {
        last += count[i];
        count[i] = last - count[i];
    }
    for (i = 0; i < n; i++) {
        output[count[(a[i] >> shift) & 15]++] = a[i];
    }
    for (i = 0; i < n; i++) {
        a[i] = output[i];
    }
}

void rsort16(int a[], int n) {
    int max = a[0];
    for (int i = 1; i < n; i++) {
        if (a[i] > max) {
            max = a[i];
        }
    }
    for (int shift = 0; (max >> shift) > 0; shift += 4) {   
        ccsort16(a, n, shift);
    }
}

使用256个条目的count数组一次排序一个字节的速度大约是两倍。计算一次通过中所有数字的计数也会更快，如rcgldr的回答所示。

请注意，此实现仍无法处理负数。

Answer 3

void int_radix_sort(void) {
    int group; //because extracting 8 bits
    int buckets = 1 << 8; //using size 256
    int map[buckets];   
    int mask = buckets - 1;
    int i;
    int cnt[buckets];
    int flag = NULL;
    int partition;
    int *src, *dst;

    for (group = 0; group < 32; group += 8) {
        // group = 8, number of bits we want per round, we want 4 rounds
        // cnt  
        for (int i = 0; i < buckets; i++) {
            cnt[i] = 0;
        }
        for (int j = 0; j < n; j++) {
            i = (lst[j] >> group) & mask;
            cnt[i]++; 
            tmp[j] = lst[j];
        }

        //map
        map[0] = 0;
        for (int i = 1; i < buckets; i++) {
            map[i] = map[i - 1] + cnt[i - 1];
        }

        //move
        for (int j = 0; j < n; j++) {   
            i = (tmp[j] >> group) & mask;
            lst[map[i]] = tmp[j];
            map[i]++;
        }
    }
}

经过几个小时的研究，我得到了答案。我仍然不明白这段代码/答案中发生了什么。我无法理解这个概念。希望有人可以解释。

Answer 4

我看到了你的观点。我认为在使用循环，标志和交换等方式对列表进行排序后，负数很容易排序。 wb unsigned浮点数？ - itproxti 2016年11月1日16:02

至于处理浮点可能有一种方法，例如345.768是数字，它需要转换为整数，即使它成为345768，我用它乘以1000。就像偏移将-ve数移动到+ ve域一样，因此将乘以1000,10000等将浮点数转换为数字，其小数部分为全零。然后他们可以被类型化为int或long。但是，如果值较大，整个改革后的数字可能无法在整个int或长范围内进行调整。

要乘以的数字必须是常数，就像偏移一样，以便保持幅度之间的关系。最好使用2的幂，如8或16，因为可以使用位移操作符。然而，就像计算偏移需要一些时间一样，乘法器的计算也需要一些时间。将搜索整个数组以计算乘以时将所有数字都以小数部分为零的最小数。

这可能无法快速计算，但如果需要，仍可以完成工作。

Radix Sort Base 16（十六进制）

4 个答案: