我是C语言编程的新手,已经找到了这个程序。它需要一个文本并计算单词的频率。我遇到的问题是当两个或多个单词出现相同的次数时,这些单词需要按字母顺序排序,但我不知道如何。
以下是代码:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define MAXWORDS 10000
#define MAXSTRING 100
/* structure holding word frequency information */
typedef struct _word {
char s[MAXSTRING]; /* the word */
int count; /* number of times word occurs */
} word;
void insert_word(word *words, int *n, char *s) {
int i;
for (i = 0; i < *n; i++) {
if (strcmp(s, words[i].s) == 0) {
/* found it? increment and return. */
words[i].count++;
return;
}
}
strcpy(words[*n].s, s);
/* this word has occurred once up to now, so count = 1 */
words[*n].count = 1;
/* one more word */
(*n)++;
}
/* comparison function for quicksort. this lets quicksort sort words
* by descending order of count, i.e., from most to least frequent
*/
int wordcmp(word *a, word *b) {
if (a->count < b->count) return +1;
if (a->count > b->count) return -1;
return 0;
}
/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */
int is_alpha(char c) {
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) return 1;
return 0;
}
/* remove the i'th character from the string s */
void remove_char (char *s, int i) {
while (s[i]) {
i++;
s[i-1] = s[i];
}
s[i] = 0;
}
/* remove non-alphabetic characters from the string s */
void remove_non_alpha(char *s) {
int i;
for (i = 0; s[i]; i++) {
if (!is_alpha (s[i]))
remove_char (s, i);
}
}
/* make all the letters in s lowercase */
void make_lowercase(char *s) {
int i;
for (i = 0; s[i]; i++)
s[i] = tolower(s[i]);
}
/* main program */
int main() {
word words[MAXWORDS];
char s[1000];
int i, n, m;
n = 0;
int a;
scanf("%d",&a);
/* read all the words in the file... */
while (!feof(stdin)) {
scanf("%s", s);
if (is_alpha(s[0])) {
remove_non_alpha(s);
make_lowercase(s);
insert_word(words, &n, s);
}
}
qsort((void *)words, n, sizeof(word),
(int (*)(const void *, const void *))wordcmp);
/* if fewer than 20 words in total, just print up the the
* first n words
*/
if (n < a)
m = n;
else
m = a;
/* print the words with their frequencies */
for (i = 0; i < m; i++)
printf("%s %d\n", words[i].s, words[i].count);
}
答案 0 :(得分:2)
您应该优化比较函数:如果出现次数相等,则返回字符串本身的比较:
/* comparison function for quicksort. this lets quicksort sort words
* by descending order of count, i.e., from most to least frequent.
* words with equal frequency will be sorted in ascending lexicographical
* order.
*/
int wordcmp (word *a, word *b) {
if (a->count < b->count) return +1;
if (a->count > b->count) return -1;
return strcmp(a->s, b->s);
}
另请注意,您的解析循环不正确:while (!feof(stdin))
未在文件末尾正确停止,最后一个单词被解析两次。您应该将逻辑更改为:
while (scanf("%999s", s) == 1) {
...
}
格式"%999s"
可防止超长字导致缓冲区溢出。如此长的单词将被默默地拆分,因此稍微倾斜统计数据,而不是调用未定义的行为(潜在的崩溃)。