Question

我的下面的代码提供了word文件中出现“is”的次数。但在这个程序中我预先定义文件的大小。帮我修改程序，以便我可以在单词计数未知的文件中获取单词“is”。数组文件的长度应该等于word文件的长度。

// Count of occurrence of word 'is' in file WordFile.

#include<stdio.h>
#include<conio.h>
#include<string.h>

//function to append

void append(char* s, char c)
{
        int len = strlen(s);
        s[len] = c;
        s[len+1] = '\0';
}

void main()
{
    FILE *fp;
    int i=0,count=0,j,k,space,times=0;
    char ch,file[1000];

    fp = fopen("../WordFile.txt","r");

    while ((ch=fgetc(fp)) != EOF)
    {
        count++;
        append(file,ch);


    }

    printf("Count of file is %d \n",count);

    printf("%s \n",file);

    for(i=0;i<(count-3);i++)
    {
        j = (file[i] == 'i'  || file[i] == 'I');

        k = (file[i+1] == 's' || file[i+1] == 'S');

        space = (file[i+2] == ' ' || file[i+2] == ',' || file[i+2] == EOF);

        if( (j && k && space ) == 1 )
            times ++;
    }

    printf("the string IS appeared %d times in the griven file. \n", times);
    getch();

}

Answer 1

您可以从stat()获取<sys/stat.h>的文件大小;例如，请看这个问题：How do you determine the size of a file in C?一旦你有文件大小，可以分配一个足够大的字符数组来保存它。

但是，你也可以先解析一个文件，然后将其全部读入内存。您一次将几个字节读入一个小缓冲区，只使用这些字节。以下是基于您的代码的该方法的快速实现。

请注意：有几种方法可以改进此代码。首先，应该有更多的错误检查;另一方面，您可以使用strcmp() / strncmp() / strnicmp()函数族来更有效地检查输入缓冲区;另一方面，你可以使用命令行参数而不是硬编码值（我在下面这样做;它是我可以提供一堆测试输入文件的唯一理智的方式）;对于另一个，您可以使用例如buf[indx++] = ch为简写（因为帖子 - 增量）;等

我对以下代码的主要观点是帮助您开始将文件处理视为流，而不是预先读取整个文件。其他人在你的问题中添加的评论也值得注意。希望这有帮助！

// count of occurrences of word 'is' in input file

#include<stdio.h>
#include<string.h>

int main(int argc, char** argv) {
    FILE *fp;
    int count = 0;
    int times = 0;

    char ch = 0;
    char buf[8];    // more than enough room to look for 'is' words
    int indx = 0;

    fp = fopen(argv[1], "r");

    // fill the input buffer with nul bytes
    memset(buf, 0, 8);
    indx = 0;

    // pretend that the input file starts with ' ', in order
    // to detect 'is' at the start of the file
    buf[indx] = ' ';
    indx++;

    while ((ch = fgetc(fp)) != EOF) {
        count++;

        buf[indx] = ch;
        indx++;

        // uncomment this to see the progression of 'buf' as
        // the input file is being read
        //printf("buf is : [%s]\n", buf);

        // if the input buffer does not begin with a word
        // boundary, start the input buffer over by resetting
        // it and looping back to the top of the reading loop
        if (buf[0] != ' ' && buf[0] != ',' && buf[0] != '\n') {
            memset(buf, 0, 8);
            indx = 0;
            continue;
        }

        // if we have read 4 characters (indx 0 through indx 3),
        // it's time to look to see if we have an 'is'
        if (indx == 4) {
            // if we have 'is' between word boundaries, count it
            if ((buf[0] == ' ' || buf[0] == ',' || buf[0] == '\n') &&
                (buf[1] == 'i' || buf[1] == 'I') &&
                (buf[2] == 's' || buf[2] == 'S') &&
                (buf[3] == ' ' || buf[3] == ',' || buf[3] == '\n')) {
                times++;
            }

            // reset the input buffer
            memset(buf, 0, 8);
            indx = 0;

            // if we ended with a word boundary, preserve it as the
            // word boundary at the beginning of the next word
            if (ch == ' ' || ch == ',' || ch == '\n') {
                buf[indx] = ' ';
                indx++;
            }
        }
    }
    // EOF is also a word boundary, so we do one final check to see
    // if there is an 'is' at the end of the file
    if ((buf[0] == ' ' || buf[0] == ',' || buf[0] == '\n') &&
        (buf[1] == 'i' || buf[1] == 'I') &&
        (buf[2] == 's' || buf[2] == 'S')) {
        times++;
    }

    printf("input file is %d characters long\n", count);
    printf("the string IS appeared %d times in the input file\n", times);
}

有关argc和argv的更多信息（回复：评论问题）

argc 是命令行参数的数量; argv 是一组指向这些命令行参数的指针。

argv[0]始终指向命令本身（即执行程序的名称）。 argc通常用于检查最小数量的命令行参数，作为循环命令行参数的限制，作为使用argv[n]之前的测试等。有时，您将看到指定的argv作为char *argv[]，当然与char **argv的操作方式相同。

因此，行fp = fopen(argv[1], "r");使用第一个命令行参数作为输入文件的文件名。例如，在我的测试中，我将此代码编译为countis并使用countis countis-input-test-001执行。（我有一系列测试输入文件，并使用shell脚本处理每个文件，以测试我对程序所做的每个编辑。）

以下是阅读更多信息并使用argc和argv查看代码示例的几个地方：

https://www.tutorialspoint.com/cprogramming/c_command_line_arguments.htm http://www.teach.cs.toronto.edu/~ajr/209/notes/argv.html

您还可以使用Google c programming argc argv或类似资源获取更多类似资源。

如何初始化一个不确定长度的数组

1 个答案: