C:文本文件中出现字母

时间:2016-11-25 04:30:56

标签: c

程序通过命令行获取输入文件,并输出文本文件中每个字母的出现位置。不知道我哪里出错了。

int main(int argc, char *argv[]) {
    char word[1000];
    int a = 0;
    int b = 0;
    int d = 0;
    int c = 0;
    int e = 0;
    int f = 0;
    int g = 0;
    int h = 0;
    int i = 0;
    int j = 0;
    int k = 0;
    int l = 0;
    int m = 0;
    int n = 0;
    int o = 0;
    int p = 0;
    int q = 0;
    int r = 0;
    int s = 0;
    int t = 0;
    int u = 0;
    int v = 0;
    int w = 0;
    int x = 0;
    int y = 0;
    int z = 0;

    int other = 0;

    int counter, lenght;

    FILE *fp = fopen(argv[1], "r");

    fgets(word, 999, fp);

    lenght = 1000;
    for(counter = 0; counter < lenght; counter++) {
        word[counter] = tolower(word[counter]);
        if (word[counter] == 'a') {
            a++;
        }
        else if (word[counter] == 'b') {
            b++;
        }
        else if (word[counter] == 'c') {
            c++;
        }
        else if (word[counter] == 'd') {
            d++;
        }
        else if (word[counter] == 'e') {
            e++;
        }
        else if (word[counter] == 'f') {
            f++;
        }
        else if (word[counter] == 'g') {
            g++;
        }
        else if (word[counter] == 'h') {
            h++;
        }
        else if (word[counter] == 'i') {
            i++;
        }
        else if (word[counter] == 'j') {
            j++;
        }
        else if (word[counter] == 'k') {
            k++;
        }
        else if (word[counter] == 'l') {
            l++;
        }
        else if (word[counter] == 'm') {
            m++;
        }
        else if (word[counter] == 'n') {
            n++;
        }
        else if (word[counter] == 'o') {
            o++;
        }
        else if (word[counter] == 'p') {
            p++;
        }
        else if (word[counter] == 'q') {
            q++;
        }
        else if (word[counter] == 'r') {
            r++;
        }
        else if (word[counter] == 's') {
            s++;
        }
        else if (word[counter] == 't') {
            t++;
        }
        else if (word[counter] == 'u') {
            u++;
        }
        else if (word[counter] == 'v') {
            v++;
        }
        else if (word[counter] == 'w') {
            w++;
        }
        else if (word[counter] == 'x') {
            x++;
        }
        else if (word[counter] == 'y') {
            y++;
        }
        else if (word[counter] == 'z') {
            z++;
        }
        else {
            other++;
        }
    }
    printf("\nCharacter frequency in %s", argv[1]);
    printf("\nCharacter   Count");
    printf("\na \t\t %d", a);
    printf("\nb \t\t %d", b);
    printf("\nc \t\t %d", c);
    printf("\nd \t\t %d", d);
    printf("\ne \t\t %d", e);
    printf("\nf \t\t %d", f);
    printf("\ng \t\t %d", g);
    printf("\nh \t\t %d", h);
    printf("\ni \t\t %d", i);
    printf("\nj \t\t %d", j);
    printf("\nk \t\t %d", k);
    printf("\nl \t\t %d", l);
    printf("\nm \t\t %d", m);
    printf("\nn \t\t %d", n);
    printf("\no \t\t %d", o);
    printf("\np \t\t %d", p);
    printf("\nq \t\t %d", q);
    printf("\nr \t\t %d", r);
    printf("\ns \t\t %d", s);
    printf("\nt \t\t %d", t);
    printf("\nu \t\t %d", u);
    printf("\nv \t\t %d", v);
    printf("\nw \t\t %d", w);
    printf("\nx \t\t %d", x);
    printf("\ny \t\t %d", y);
    printf("\nz \t\t %d", z);
    fclose(fp);
    return 0;
}

应该在两列中输出一个是字母,下一个是字母出现的次数

2 个答案:

答案 0 :(得分:5)

您的代码存在问题:

  • 您不包含<stdio.h><ctype.h>

  • 您只读了一行,甚至没有检查是否成功。你应该写一个像while (fgets(word, sizeof word, fp)) {

  • 这样的循环
  • 检查word数组中的所有字符:您应该停在该行的末尾:lenght = strlen(word);

  • tolower()不应该被赋予char参数,因为在char被签名的平台上,负值会调用未定义的行为。您可以将参数强制转换为(unsigned char)以避免这种情况:word[counter] = tolower((unsigned char)word[counter]);

有更多改进空间:

  • lenght拼写错误,应为length

  • 您应该使用一组计数器来避免所有这些测试以及所有这些明确的printf语句。

  • 检查参数计数和fopen()成功

  • 无需逐行读取,一次使用getc()处理一个字节。但是,一次读取一个大块可能会更快,因为它使用较少的测试和锁定。

  • printf语句应该在结尾而不是在开头输出换行符。

以下是更正后的简化版本:

#include <ctype.h>
#include <limits.h>
#include <stdio.h>

int main(int argc, char *argv[]) {
    int count[UCHAR_MAX + 1] = { 0 };
    int other, total;
    int c;
    const char *s;
    FILE *fp;

    if (argc <= 1) {
        fprintf(stderr, "missing input file\n");
        return 1;
    }
    fp = fopen(argv[1], "r");
    if (fp == NULL) {
        fprintf(stderr, "cannot open input file %s\n", argv[1]);
        return 1;
    }

    total = 0;
    while ((c = getc(fp)) != EOF) {
        count[tolower(c)] += 1;
        total++;
    }

    printf("Character frequency in %s\n", argv[1]);
    printf("Character   Count\n");
    other = total;
    for (s = "abcdefghijklmnopqrstuvwxyz"; *s; s++) {
        printf("%c:\t%9d\n", *s, count[(unsigned char)*s]);
        other -= count[(unsigned char)*s];
    }
    printf("other:\t%9d\n", other);
    fclose(fp);
    return 0;
}

使用chunks而不是一个字节一次读取文件可以显着提高最近C库的速度,因为对多线程的支持使getc()宏效率低下。使用64K缓冲区,对于400MB文件,下面的代码快50倍(50倍)

#include <ctype.h>
#include <limits.h>
#include <stdio.h>

#define BUFFER_SIZE 65536
int main(int argc, char *argv[]) {
    unsigned char buffer[BUFFER_SIZE];
    long count[UCHAR_MAX + 1] = { 0 };
    long other;
    size_t i, n;
    const char *s;
    FILE *fp;

    if (argc <= 1) {
        fprintf(stderr, "missing input file\n");
        return 1;
    }
    fp = fopen(argv[1], "r");
    if (fp == NULL) {
        fprintf(stderr, "cannot open input file %s\n", argv[1]);
        return 1;
    }

    while ((n = fread(buffer, 1, sizeof buffer, fp)) != 0) {
        for (i = 0; i < n; i++) {
            count[buffer[i]] += 1;
        }
    }
    other = 0;
    for (i = 0; i <= UCHAR_MAX; i++) {
        if (isupper(i)) {
            count[tolower(i)] += count[i];
        } else {
            if (!islower(i))
                other += count[i];
        }
    }

    printf("Character frequency in %s\n", argv[1]);
    printf("Character   Count\n");
    for (s = "abcdefghijklmnopqrstuvwxyz"; *s; s++) {
        printf("%c:\t%9ld\n", *s, count[(unsigned char)*s]);
    }
    printf("other:\t%9ld\n", other);
    fclose(fp);
    return 0;
}

答案 1 :(得分:0)

这是我写的快速实现。它没有使用fgets,但这绝对是一种选择。

程序的流程应该很简单,但如下所示:

  • 检查正确的参数计数。
  • 声明我们需要的变量。
  • 声明文件指针并尝试打开文件。
  • 如果文件没有打开,我们就会输出错误。
  • 一次读入一个文件中的每个字符,并将其存储到变量c中。
  • 使用我们的ascii表,我们将改变这些值,使它们进入我们阵列中的正确位置。
  • 打印出我们所有的价值观。
  • 关闭文件。

    #include <stdio.h>
    
    int main(int argc, char **argv){
    
        if (argc < 2){
            printf("Not enough arguments!\n");
            return -1;
        }
    
        int A[27] = {0}, c;
    
        FILE *inFile = fopen(argv[1], "r");
        if (inFile == NULL){
            printf("The file \"%s\" could not be opened.\n", argv[1]);
            return -2;
        }
    
        while((c = fgetc(inFile)) != EOF){
            if ( c >= 'a' && c <= 'z' ){
                /* C is a lowercase character */
                c-='a'; 
                A[c]++;
            }
            else if ( c >= 'A' && c <= 'Z' ){
                /* C is an uppercase character */
                c-='A';
                A[c]++;
            }
            else if (c == '\n'){
                /* we're not counting newlines */
                continue;
            }
            else {
                A[26]++;
            }
        }
    
        /* Print out all the values except the "Other" count. */
        for (c = 0; c < sizeof A / sizeof A[0] - 1; c++){
                printf("%c: %d\n", c+'a', A[c]);
        } printf("Other: %d\n", A[26]); //Print out "Other" count
    
        /* Close our file */
        fclose(inFile);
        return 0;
    }