Question

在这些日子里，我一直在发布一些代码，因为我正在做练习，最后似乎我已经结束了它，但我注意到它没有用。练习要求输入： - N是一个整数，表示要读取的字符串数 - K一个整数 - N个字符串字符串可以是重复的。在输出中，存在最频繁的K字符串的打印，根据其频率（降序）排序。

示例测试集：

输入：

6
2
mickey
mouse
mickey
hello
mouse
mickey

输出：

mickey // Has freq 3
mouse // Has freq 2

我希望我能以一种好的方式解释这项运动，因为这是我的尝试。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct _stringa {
    char* string;
    int freq;
} stringa;


int compare(const void *elem1, const void *elem2) {
    stringa *first = (stringa *)elem1;
    stringa *second = (stringa *)elem2;

    if (first->freq < second->freq) {
        return -1;
    } else if (first->freq > second->freq) {
        return 1;
    } else {
    return 0;
    }
}

int BinarySearch(stringa** array, char* string, int left, int right) {
    int middle;
    if (left==right) {
        if (strcmp(string,array[left]->string)==0) {
            return left;
        } else {
            return -1;
        }
    }
    middle = (left+right)/2;
    if ((strcmp(string,array[middle]->string)<0) || (strcmp(string,array[middle]->string)==0) ) {
        return BinarySearch(array, string, left, middle);
    } else {
        return BinarySearch(array, string, middle+1, right);
    }

}


int main (void)
{
    char value[101];
    int n = 0;
    int stop;
    scanf("%d", &n); // Number of strings
    scanf("%d", &stop); // number of the most frequent strings to print

    stringa **array = NULL;
    array = malloc ( n * sizeof (struct _stringa *) );

    int i = 0;

    for (i=0; i<n; i++) {

        array[i] = malloc (sizeof (struct _stringa));
        array[i]->string = malloc (sizeof (value)); 

        scanf("%s", value);

        int already;
        already = BinarySearch(array, value, 0, i); // With a binary search, I see if the string is present in the previous positions of the array I am occupying. If it is not present, I copy the string into the array, otherwise, I use the value of binary search (which is the position of the element in the array) and I update the frequency field


        if (already==-1) {
            strcpy(array[i]->string,value); 
            array[i]->freq = 1;
        } else {
            array[already]->freq += 1;
        }

    }


    stringa **newarray = NULL; // New struct array of strings
    newarray = malloc ( n * sizeof (struct _stringa *) );

    int k = 0;
    for (i=0; i<n; i++) { // I use this loop to copy the element that don't have a frequency == 0
        if (array[i]->freq != 0) {
            newarray[k] = malloc(sizeof(struct _stringa));
            newarray[k] = malloc(sizeof(value));
            newarray[k]->string = array[i]->string;
            newarray[k]->freq = array[i]->freq;
            k++;
        }
    }
        qsort(newarray, n, sizeof(stringa*), compare);

        i=0;
        while ((newarray[i]!= NULL) && (i<k)) {
            printf("%s ", newarray[i]->string);
            printf("%d\n", newarray[i]->freq);
            i++;
        }


// Freeing operations        

    while (--n >= 0) {
        if (array[n]->string) free (array[n]->string);
        if (array[n]) free (array[n]);
    }

    if (array) free (array);
    if (newarray) free (newarray);

    return 0;
}

提前感谢任何有时间和耐心阅读此代码的人。

编辑：

我忘记添加它无法正常工作的内容。如果我没有使用qsort进行调试，我会使用此输入作为示例：五 2 //随机数，我仍然需要打印k字符串＆＃39;部分，你好你好你好你好喂

它打印：你好3（频率）你好2（频率）

所以它不能正常工作。正如您在评论中所建议的那样，二进制搜索存在缺陷，因为它仅适用于有序列表。我能做的就是每次订购数组，但我认为这会适得其反。有什么可以摆脱仅定位数组中不存在的字符串的问题？

Answer 1

如果您想要一个没有排序的有效方法，请使用哈希表。否则，只需将每个唯一的字符串放在一个数组中，然后线性扫描，简单可靠。

在现代硬件上，由于缓存和最小化间接，这种扫描实际上很快。对于少量项目，插入排序实际上比qsort更有效。看着＆＃34; Tim sort＆＃34;例如，该算法是稳定的，并且通过几乎排序的数据避免了qsort的差的性能，它混合了合并和插入排序以实现n Log n，而没有实际数据的极端情况。

C - 打印最频繁的字符串

1 个答案: