Question

我正在尝试逐行读取文件并将其拆分为单词。这些单词应该保存到一个数组中。但是，程序只获取文本文件的第一行，当它尝试读取新行时，程序崩溃。

FILE *inputfile = fopen("file.txt", "r");
char buf [1024];
int i=0;
char fileName [25];
char words [100][100];
char *token;

 while(fgets(buf,sizeof(buf),inputfile)!=NULL){

        token = strtok(buf, " ");
        strcpy(words[0], token);
        printf("%s\n", words[0]);
        while (token != NULL) {


            token = strtok(NULL, "  ");
            strcpy(words[i],token);
            printf("%s\n",words[i]);
            i++;

        }

    }

Answer 1

问题是在内部while循环中获取下一个标记并将结果传递给strcpy而不检查NULL结果。

while(fgets(buf,sizeof(buf),inputfile)!=NULL){

    token = strtok(buf, " ");
    strcpy(words[0], token);
    printf("%s\n", words[0]);

    while (token != NULL) {//not at the end of the line. yet!
        token = strtok(NULL, "  ");//get next token. but token == NULL at end of line
        //passing NULL to strcpy is a problem
        strcpy(words[i],token);
        printf("%s\n",words[i]);
        i++;
    }
}

通过将检查结合到while条件中，避免将NULL作为第二个参数传递给strcpy。

while ( ( token = strtok ( NULL, "  ")) != NULL) {//get next token != NULL
    //if token == NULL the while block is not executed
    strcpy(words[i],token);
    printf("%s\n",words[i]);
    i++;
}

Answer 2

清理你的循环，不要重复自己：

#include <stdio.h>
#include <string.h>

int main(void)
{

    FILE *inputfile = fopen("file.txt", "r");
    char buf [1024];
    int i=0;
    char fileName [25];
    char words [100][100];
    char *token;

 for(i=0; fgets(buf,sizeof(buf),inputfile); ) {
        for(token = strtok(buf, " "); token != NULL; token = strtok(NULL, " ")){
            strcpy(words[i++], token);
            }
        }
    return 0;
    }

Answer 3

在得到xing的好答案后，我决定编写完整的简单程序来实现你的任务并讲述我的解决方案。我的程序逐行读取一个文件，作为输入参数给出，并将下一行保存到缓冲区中。

代码：

#include <assert.h>
#include <errno.h>
#define _WITH_GETLINE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define assert_msg(x) for ( ; !(x) ; assert(x) )

int
main(int argc, char **argv)
{
        FILE *file;
        char *buf, *token;
        size_t length, read, size;

        assert(argc == 2);

        file = fopen(argv[1], "r");
        assert_msg(file != NULL) {
                fprintf(stderr, "Error ocurred: %s\n", strerror(errno));
        }

        token = NULL;
        length = read = size = 0;

        while ((read = getline(&token, &length, file)) != -1) {
                token[read - 1] = ' ';

                size += read;
                buf = realloc(buf, size);
                assert(buf != NULL);

                (void)strncat(buf, token, read);
        }
        printf("%s\n", buf);

        fclose(file);
        free(buf);
        free(token);

        return (EXIT_SUCCESS);
}

对于文件file.txt：

that is a
text
which I
would like to
read
from file.

我得到了一个结果：

$ ./program file.txt
that is a text which I would like to read from file.

关于该解决方案的一些值得说的话：

而不是fgets(3)我使用了getline(3)函数，因为它很容易知道行（read变量）中的字符串长度以及获取字符串的自动内存分配（token ）。重要的是要记住它free(3)。对于类Unix系统，默认情况下不提供getline(3)以避免兼容性问题。因此，在#define _WITH_GETLINE标头之前使用<stdio.h>宏来使该功能可用。
buf仅包含保存字符串所需的必需空间量。从文件buf读取一行后，realloc(3)所需的空间量会延长。它是否更普遍＆＃34;普遍＆＃34;解。重要的是要记住释放堆上分配的对象。
我还使用strncat(3)，确保不会将read个字符（token的长度）保存到buf。它也不是使用strncat(3)的最佳方式，因为我们还应测试字符串截断。但总的来说，它优于使用不建议使用的strcat(3)，因为恶意用户可以通过缓冲区溢出攻击任意改变正在运行的程序的功能。 strcat(3)和strncat(3)也添加了终止\0。
getline(3)返回带有新行字符的标记，因此我决定将其从新行替换为空格（在从文件中给出的单词创建句子的上下文中）。我也应该消除最后的空格，但我不想让源代码复杂化。

从非强制性的事情我也定义了我自己的宏assert_msg(x)，它能够运行assert(3)函数并显示错误的文本消息。但它只是一个功能，但多亏了我们能够在错误的尝试打开文件时看到错误消息。

从文件中拆分字符串并将它们放入数组会导致程序崩溃

3 个答案: