程序通过命令行获取输入文件,并输出文本文件中每个字母的出现位置。不知道我哪里出错了。
int main(int argc, char *argv[]) {
char word[1000];
int a = 0;
int b = 0;
int d = 0;
int c = 0;
int e = 0;
int f = 0;
int g = 0;
int h = 0;
int i = 0;
int j = 0;
int k = 0;
int l = 0;
int m = 0;
int n = 0;
int o = 0;
int p = 0;
int q = 0;
int r = 0;
int s = 0;
int t = 0;
int u = 0;
int v = 0;
int w = 0;
int x = 0;
int y = 0;
int z = 0;
int other = 0;
int counter, lenght;
FILE *fp = fopen(argv[1], "r");
fgets(word, 999, fp);
lenght = 1000;
for(counter = 0; counter < lenght; counter++) {
word[counter] = tolower(word[counter]);
if (word[counter] == 'a') {
a++;
}
else if (word[counter] == 'b') {
b++;
}
else if (word[counter] == 'c') {
c++;
}
else if (word[counter] == 'd') {
d++;
}
else if (word[counter] == 'e') {
e++;
}
else if (word[counter] == 'f') {
f++;
}
else if (word[counter] == 'g') {
g++;
}
else if (word[counter] == 'h') {
h++;
}
else if (word[counter] == 'i') {
i++;
}
else if (word[counter] == 'j') {
j++;
}
else if (word[counter] == 'k') {
k++;
}
else if (word[counter] == 'l') {
l++;
}
else if (word[counter] == 'm') {
m++;
}
else if (word[counter] == 'n') {
n++;
}
else if (word[counter] == 'o') {
o++;
}
else if (word[counter] == 'p') {
p++;
}
else if (word[counter] == 'q') {
q++;
}
else if (word[counter] == 'r') {
r++;
}
else if (word[counter] == 's') {
s++;
}
else if (word[counter] == 't') {
t++;
}
else if (word[counter] == 'u') {
u++;
}
else if (word[counter] == 'v') {
v++;
}
else if (word[counter] == 'w') {
w++;
}
else if (word[counter] == 'x') {
x++;
}
else if (word[counter] == 'y') {
y++;
}
else if (word[counter] == 'z') {
z++;
}
else {
other++;
}
}
printf("\nCharacter frequency in %s", argv[1]);
printf("\nCharacter Count");
printf("\na \t\t %d", a);
printf("\nb \t\t %d", b);
printf("\nc \t\t %d", c);
printf("\nd \t\t %d", d);
printf("\ne \t\t %d", e);
printf("\nf \t\t %d", f);
printf("\ng \t\t %d", g);
printf("\nh \t\t %d", h);
printf("\ni \t\t %d", i);
printf("\nj \t\t %d", j);
printf("\nk \t\t %d", k);
printf("\nl \t\t %d", l);
printf("\nm \t\t %d", m);
printf("\nn \t\t %d", n);
printf("\no \t\t %d", o);
printf("\np \t\t %d", p);
printf("\nq \t\t %d", q);
printf("\nr \t\t %d", r);
printf("\ns \t\t %d", s);
printf("\nt \t\t %d", t);
printf("\nu \t\t %d", u);
printf("\nv \t\t %d", v);
printf("\nw \t\t %d", w);
printf("\nx \t\t %d", x);
printf("\ny \t\t %d", y);
printf("\nz \t\t %d", z);
fclose(fp);
return 0;
}
应该在两列中输出一个是字母,下一个是字母出现的次数
答案 0 :(得分:5)
您的代码存在问题:
您不包含<stdio.h>
或<ctype.h>
您只读了一行,甚至没有检查是否成功。你应该写一个像while (fgets(word, sizeof word, fp)) {
检查word
数组中的所有字符:您应该停在该行的末尾:lenght = strlen(word);
tolower()
不应该被赋予char
参数,因为在char
被签名的平台上,负值会调用未定义的行为。您可以将参数强制转换为(unsigned char)
以避免这种情况:word[counter] = tolower((unsigned char)word[counter]);
有更多改进空间:
lenght
拼写错误,应为length
。
您应该使用一组计数器来避免所有这些测试以及所有这些明确的printf
语句。
检查参数计数和fopen()
成功
无需逐行读取,一次使用getc()
处理一个字节。但是,一次读取一个大块可能会更快,因为它使用较少的测试和锁定。
printf
语句应该在结尾而不是在开头输出换行符。
以下是更正后的简化版本:
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
int main(int argc, char *argv[]) {
int count[UCHAR_MAX + 1] = { 0 };
int other, total;
int c;
const char *s;
FILE *fp;
if (argc <= 1) {
fprintf(stderr, "missing input file\n");
return 1;
}
fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "cannot open input file %s\n", argv[1]);
return 1;
}
total = 0;
while ((c = getc(fp)) != EOF) {
count[tolower(c)] += 1;
total++;
}
printf("Character frequency in %s\n", argv[1]);
printf("Character Count\n");
other = total;
for (s = "abcdefghijklmnopqrstuvwxyz"; *s; s++) {
printf("%c:\t%9d\n", *s, count[(unsigned char)*s]);
other -= count[(unsigned char)*s];
}
printf("other:\t%9d\n", other);
fclose(fp);
return 0;
}
使用chunks而不是一个字节一次读取文件可以显着提高最近C库的速度,因为对多线程的支持使getc()
宏效率低下。使用64K缓冲区,对于400MB文件,下面的代码快50倍(50倍):
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#define BUFFER_SIZE 65536
int main(int argc, char *argv[]) {
unsigned char buffer[BUFFER_SIZE];
long count[UCHAR_MAX + 1] = { 0 };
long other;
size_t i, n;
const char *s;
FILE *fp;
if (argc <= 1) {
fprintf(stderr, "missing input file\n");
return 1;
}
fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "cannot open input file %s\n", argv[1]);
return 1;
}
while ((n = fread(buffer, 1, sizeof buffer, fp)) != 0) {
for (i = 0; i < n; i++) {
count[buffer[i]] += 1;
}
}
other = 0;
for (i = 0; i <= UCHAR_MAX; i++) {
if (isupper(i)) {
count[tolower(i)] += count[i];
} else {
if (!islower(i))
other += count[i];
}
}
printf("Character frequency in %s\n", argv[1]);
printf("Character Count\n");
for (s = "abcdefghijklmnopqrstuvwxyz"; *s; s++) {
printf("%c:\t%9ld\n", *s, count[(unsigned char)*s]);
}
printf("other:\t%9ld\n", other);
fclose(fp);
return 0;
}
答案 1 :(得分:0)
这是我写的快速实现。它没有使用fgets,但这绝对是一种选择。
程序的流程应该很简单,但如下所示:
c
中。关闭文件。
#include <stdio.h>
int main(int argc, char **argv){
if (argc < 2){
printf("Not enough arguments!\n");
return -1;
}
int A[27] = {0}, c;
FILE *inFile = fopen(argv[1], "r");
if (inFile == NULL){
printf("The file \"%s\" could not be opened.\n", argv[1]);
return -2;
}
while((c = fgetc(inFile)) != EOF){
if ( c >= 'a' && c <= 'z' ){
/* C is a lowercase character */
c-='a';
A[c]++;
}
else if ( c >= 'A' && c <= 'Z' ){
/* C is an uppercase character */
c-='A';
A[c]++;
}
else if (c == '\n'){
/* we're not counting newlines */
continue;
}
else {
A[26]++;
}
}
/* Print out all the values except the "Other" count. */
for (c = 0; c < sizeof A / sizeof A[0] - 1; c++){
printf("%c: %d\n", c+'a', A[c]);
} printf("Other: %d\n", A[26]); //Print out "Other" count
/* Close our file */
fclose(inFile);
return 0;
}