Question

我是C语言编程的新手，已经找到了这个程序。它需要一个文本并计算单词的频率。我遇到的问题是当两个或多个单词出现相同的次数时，这些单词需要按字母顺序排序，但我不知道如何。

以下是代码：

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

#define MAXWORDS    10000
#define MAXSTRING   100

/* structure holding word frequency information */

typedef struct _word {
    char    s[MAXSTRING];   /* the word */
    int count;      /* number of times word occurs */
} word;

void insert_word(word *words, int *n, char *s) {
    int i;

    for (i = 0; i < *n; i++) {
        if (strcmp(s, words[i].s) == 0) {
            /* found it?  increment and return. */
            words[i].count++;
            return;
        }
    }    
    strcpy(words[*n].s, s);

    /* this word has occurred once up to now, so count = 1 */
    words[*n].count = 1;

    /* one more word */
    (*n)++;
}

/* comparison function for quicksort.  this lets quicksort sort words
 * by descending order of count, i.e., from most to least frequent
 */
int wordcmp(word *a, word *b) {
    if (a->count < b->count) return +1;
    if (a->count > b->count) return -1;
    return 0;
}

/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */
int is_alpha(char c) {
    if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) return 1;
    return 0;
}

/* remove the i'th character from the string s */
void remove_char (char *s, int i) {
    while (s[i]) {
        i++;
        s[i-1] = s[i];
    }
    s[i] = 0;
}

/* remove non-alphabetic characters from the string s */
void remove_non_alpha(char *s) {
    int i;

    for (i = 0; s[i]; i++) {
        if (!is_alpha (s[i]))
            remove_char (s, i);
    }
}

/* make all the letters in s lowercase */
void make_lowercase(char *s) {
    int i;

    for (i = 0; s[i]; i++)
        s[i] = tolower(s[i]);
}

/* main program */
int main() {
    word    words[MAXWORDS];
    char    s[1000];
    int i, n, m;

    n = 0;
    int a;
    scanf("%d",&a);

    /* read all the words in the file... */

    while (!feof(stdin)) {
        scanf("%s", s);

        if (is_alpha(s[0])) {
            remove_non_alpha(s);
            make_lowercase(s);
            insert_word(words, &n, s);
        }
    }

    qsort((void *)words, n, sizeof(word),
          (int (*)(const void *, const void *))wordcmp);

    /* if fewer than 20 words in total, just print up the the
     * first n words
     */
    if (n < a)
        m = n;
    else
        m = a;

    /* print the words with their frequencies */
    for (i = 0; i < m; i++)
        printf("%s %d\n", words[i].s, words[i].count);
}

Answer 1

您应该优化比较函数：如果出现次数相等，则返回字符串本身的比较：

/* comparison function for quicksort.  this lets quicksort sort words
 * by descending order of count, i.e., from most to least frequent.
 * words with equal frequency will be sorted in ascending lexicographical
 * order.
 */
int wordcmp (word *a, word *b) {
    if (a->count < b->count) return +1;
    if (a->count > b->count) return -1;
    return strcmp(a->s, b->s);
}

另请注意，您的解析循环不正确：while (!feof(stdin))未在文件末尾正确停止，最后一个单词被解析两次。您应该将逻辑更改为：

while (scanf("%999s", s) == 1) {
    ...
}

格式"%999s"可防止超长字导致缓冲区溢出。如此长的单词将被默默地拆分，因此稍微倾斜统计数据，而不是调用未定义的行为（潜在的崩溃）。

按字母顺序在C中排序字符串

1 个答案: