Question

程序通过命令行获取输入文件，并输出文本文件中每个字母的出现位置。不知道我哪里出错了。

int main(int argc, char *argv[]) {
    char word[1000];
    int a = 0;
    int b = 0;
    int d = 0;
    int c = 0;
    int e = 0;
    int f = 0;
    int g = 0;
    int h = 0;
    int i = 0;
    int j = 0;
    int k = 0;
    int l = 0;
    int m = 0;
    int n = 0;
    int o = 0;
    int p = 0;
    int q = 0;
    int r = 0;
    int s = 0;
    int t = 0;
    int u = 0;
    int v = 0;
    int w = 0;
    int x = 0;
    int y = 0;
    int z = 0;

    int other = 0;

    int counter, lenght;

    FILE *fp = fopen(argv[1], "r");

    fgets(word, 999, fp);

    lenght = 1000;
    for(counter = 0; counter < lenght; counter++) {
        word[counter] = tolower(word[counter]);
        if (word[counter] == 'a') {
            a++;
        }
        else if (word[counter] == 'b') {
            b++;
        }
        else if (word[counter] == 'c') {
            c++;
        }
        else if (word[counter] == 'd') {
            d++;
        }
        else if (word[counter] == 'e') {
            e++;
        }
        else if (word[counter] == 'f') {
            f++;
        }
        else if (word[counter] == 'g') {
            g++;
        }
        else if (word[counter] == 'h') {
            h++;
        }
        else if (word[counter] == 'i') {
            i++;
        }
        else if (word[counter] == 'j') {
            j++;
        }
        else if (word[counter] == 'k') {
            k++;
        }
        else if (word[counter] == 'l') {
            l++;
        }
        else if (word[counter] == 'm') {
            m++;
        }
        else if (word[counter] == 'n') {
            n++;
        }
        else if (word[counter] == 'o') {
            o++;
        }
        else if (word[counter] == 'p') {
            p++;
        }
        else if (word[counter] == 'q') {
            q++;
        }
        else if (word[counter] == 'r') {
            r++;
        }
        else if (word[counter] == 's') {
            s++;
        }
        else if (word[counter] == 't') {
            t++;
        }
        else if (word[counter] == 'u') {
            u++;
        }
        else if (word[counter] == 'v') {
            v++;
        }
        else if (word[counter] == 'w') {
            w++;
        }
        else if (word[counter] == 'x') {
            x++;
        }
        else if (word[counter] == 'y') {
            y++;
        }
        else if (word[counter] == 'z') {
            z++;
        }
        else {
            other++;
        }
    }
    printf("\nCharacter frequency in %s", argv[1]);
    printf("\nCharacter   Count");
    printf("\na \t\t %d", a);
    printf("\nb \t\t %d", b);
    printf("\nc \t\t %d", c);
    printf("\nd \t\t %d", d);
    printf("\ne \t\t %d", e);
    printf("\nf \t\t %d", f);
    printf("\ng \t\t %d", g);
    printf("\nh \t\t %d", h);
    printf("\ni \t\t %d", i);
    printf("\nj \t\t %d", j);
    printf("\nk \t\t %d", k);
    printf("\nl \t\t %d", l);
    printf("\nm \t\t %d", m);
    printf("\nn \t\t %d", n);
    printf("\no \t\t %d", o);
    printf("\np \t\t %d", p);
    printf("\nq \t\t %d", q);
    printf("\nr \t\t %d", r);
    printf("\ns \t\t %d", s);
    printf("\nt \t\t %d", t);
    printf("\nu \t\t %d", u);
    printf("\nv \t\t %d", v);
    printf("\nw \t\t %d", w);
    printf("\nx \t\t %d", x);
    printf("\ny \t\t %d", y);
    printf("\nz \t\t %d", z);
    fclose(fp);
    return 0;
}

应该在两列中输出一个是字母，下一个是字母出现的次数

Answer 1

您的代码存在问题：

您不包含<stdio.h>或<ctype.h>
您只读了一行，甚至没有检查是否成功。你应该写一个像while (fgets(word, sizeof word, fp)) {
检查word数组中的所有字符：您应该停在该行的末尾：lenght = strlen(word);
tolower()不应该被赋予char参数，因为在char被签名的平台上，负值会调用未定义的行为。您可以将参数强制转换为(unsigned char)以避免这种情况：word[counter] = tolower((unsigned char)word[counter]);

有更多改进空间：

lenght拼写错误，应为length。
您应该使用一组计数器来避免所有这些测试以及所有这些明确的printf语句。
检查参数计数和fopen()成功
无需逐行读取，一次使用getc()处理一个字节。但是，一次读取一个大块可能会更快，因为它使用较少的测试和锁定。
printf语句应该在结尾而不是在开头输出换行符。

以下是更正后的简化版本：

#include <ctype.h>
#include <limits.h>
#include <stdio.h>

int main(int argc, char *argv[]) {
    int count[UCHAR_MAX + 1] = { 0 };
    int other, total;
    int c;
    const char *s;
    FILE *fp;

    if (argc <= 1) {
        fprintf(stderr, "missing input file\n");
        return 1;
    }
    fp = fopen(argv[1], "r");
    if (fp == NULL) {
        fprintf(stderr, "cannot open input file %s\n", argv[1]);
        return 1;
    }

    total = 0;
    while ((c = getc(fp)) != EOF) {
        count[tolower(c)] += 1;
        total++;
    }

    printf("Character frequency in %s\n", argv[1]);
    printf("Character   Count\n");
    other = total;
    for (s = "abcdefghijklmnopqrstuvwxyz"; *s; s++) {
        printf("%c:\t%9d\n", *s, count[(unsigned char)*s]);
        other -= count[(unsigned char)*s];
    }
    printf("other:\t%9d\n", other);
    fclose(fp);
    return 0;
}

使用chunks而不是一个字节一次读取文件可以显着提高最近C库的速度，因为对多线程的支持使getc()宏效率低下。使用64K缓冲区，对于400MB文件，下面的代码快50倍（50倍）：

#include <ctype.h>
#include <limits.h>
#include <stdio.h>

#define BUFFER_SIZE 65536
int main(int argc, char *argv[]) {
    unsigned char buffer[BUFFER_SIZE];
    long count[UCHAR_MAX + 1] = { 0 };
    long other;
    size_t i, n;
    const char *s;
    FILE *fp;

    if (argc <= 1) {
        fprintf(stderr, "missing input file\n");
        return 1;
    }
    fp = fopen(argv[1], "r");
    if (fp == NULL) {
        fprintf(stderr, "cannot open input file %s\n", argv[1]);
        return 1;
    }

    while ((n = fread(buffer, 1, sizeof buffer, fp)) != 0) {
        for (i = 0; i < n; i++) {
            count[buffer[i]] += 1;
        }
    }
    other = 0;
    for (i = 0; i <= UCHAR_MAX; i++) {
        if (isupper(i)) {
            count[tolower(i)] += count[i];
        } else {
            if (!islower(i))
                other += count[i];
        }
    }

    printf("Character frequency in %s\n", argv[1]);
    printf("Character   Count\n");
    for (s = "abcdefghijklmnopqrstuvwxyz"; *s; s++) {
        printf("%c:\t%9ld\n", *s, count[(unsigned char)*s]);
    }
    printf("other:\t%9ld\n", other);
    fclose(fp);
    return 0;
}

Answer 2

这是我写的快速实现。它没有使用fgets，但这绝对是一种选择。

程序的流程应该很简单，但如下所示：

检查正确的参数计数。
声明我们需要的变量。
声明文件指针并尝试打开文件。
如果文件没有打开，我们就会输出错误。
一次读入一个文件中的每个字符，并将其存储到变量c中。
使用我们的ascii表，我们将改变这些值，使它们进入我们阵列中的正确位置。
打印出我们所有的价值观。

关闭文件。

#include <stdio.h>

int main(int argc, char **argv){

    if (argc < 2){
        printf("Not enough arguments!\n");
        return -1;
    }

    int A[27] = {0}, c;

    FILE *inFile = fopen(argv[1], "r");
    if (inFile == NULL){
        printf("The file \"%s\" could not be opened.\n", argv[1]);
        return -2;
    }

    while((c = fgetc(inFile)) != EOF){
        if ( c >= 'a' && c <= 'z' ){
            /* C is a lowercase character */
            c-='a'; 
            A[c]++;
        }
        else if ( c >= 'A' && c <= 'Z' ){
            /* C is an uppercase character */
            c-='A';
            A[c]++;
        }
        else if (c == '\n'){
            /* we're not counting newlines */
            continue;
        }
        else {
            A[26]++;
        }
    }

    /* Print out all the values except the "Other" count. */
    for (c = 0; c < sizeof A / sizeof A[0] - 1; c++){
            printf("%c: %d\n", c+'a', A[c]);
    } printf("Other: %d\n", A[26]); //Print out "Other" count

    /* Close our file */
    fclose(inFile);
    return 0;
}

C：文本文件中出现字母

2 个答案: