Question

使用C我想以这样的方式读取文本文件的内容，以便在完成所有操作时完成一个字符串数组，其中第n个字符串表示文本文件的第n行。文件的行可以任意长。

实现这一目标的优雅方式是什么？我知道将一个文本文件直接读入一个适当大小的缓冲区的一些巧妙的技巧，但将其分解为行会使它变得更加棘手（至少据我所知）。

非常感谢！

Answer 1

将其分解为行意味着解析文本并将所有EOL（通过EOL我的意思是\ n和\ r）替换为0。通过这种方式，您实际上可以重用缓冲区并将每行的开头存储到单独的char *数组中（所有这些都只通过2次传递）。

通过这种方式，您可以对整个文件大小进行一次读取+ 2次解析，这可能会提高性能。

Answer 2

可以读取文件中的行数（循环fgets），然后创建一个二维数组，第一个维度是行数+ 1。然后，只需将文件重新读入数组。

但是，您需要定义元素的长度。或者，计算最长的线路大小。

示例代码：

inFile = fopen(FILENAME, "r");
lineCount = 0;
while(inputError != EOF) {
    inputError = fscanf(inFile, "%s\n", word);
    lineCount++;
}
fclose(inFile);
  // Above iterates lineCount++ after the EOF to allow for an array
  // that matches the line numbers

char names[lineCount][MAX_LINE];

fopen(FILENAME, "r");
for(i = 1; i < lineCount; i++)
    fscanf(inFile, "%s", names[i]);
fclose(inFile);

Answer 3

对于C（而不是C ++），您最终可能会使用fgets()。但是，由于您的任意长度行，您可能会遇到问题。

Answer 4

也许链接列表是最好的方法吗？编译器不喜欢有一个不知道有多大的数组。使用链接列表，您可以拥有一个非常大的文本文件，而不必担心为阵列分配足够的内存。

不幸的是，我还没有学会如何做链接列表，但也许其他人可以帮助你。

Answer 5

如果你有一个很好的方法将整个文件读入内存，那么你几乎就在那里。完成后，您可以扫描文件两次。一旦计算行数，一次设置行指针并用'\ n'替换'\ n'和（如果在Windows二进制模式下读取文件，则可能是'\ r'）。在两次扫描之间分配一个指针数组，现在您已知道需要多少指针。

Answer 6

你可以这样使用

#include <stdlib.h> /* exit, malloc, realloc, free */
#include <stdio.h>  /* fopen, fgetc, fputs, fwrite */

struct line_reader {
    /* All members are private. */
    FILE    *f;
    char    *buf;
    size_t   siz;
};

/*
 * Initializes a line reader _lr_ for the stream _f_.
 */
void
lr_init(struct line_reader *lr, FILE *f)
{
    lr->f = f;
    lr->buf = NULL;
    lr->siz = 0;
}

/*
 * Reads the next line. If successful, returns a pointer to the line,
 * and sets *len to the number of characters, at least 1. The result is
 * _not_ a C string; it has no terminating '\0'. The returned pointer
 * remains valid until the next call to next_line() or lr_free() with
 * the same _lr_.
 *
 * next_line() returns NULL at end of file, or if there is an error (on
 * the stream, or with memory allocation).
 */
char *
next_line(struct line_reader *lr, size_t *len)
{
    size_t newsiz;
    int c;
    char *newbuf;

    *len = 0;           /* Start with empty line. */
    for (;;) {
        c = fgetc(lr->f);   /* Read next character. */
        if (ferror(lr->f))
            return NULL;

        if (c == EOF) {
            /*
             * End of file is also end of last line,
        `    * unless this last line would be empty.
             */
            if (*len == 0)
                return NULL;
            else
                return lr->buf;
        } else {
            /* Append c to the buffer. */
            if (*len == lr->siz) {
                /* Need a bigger buffer! */
                newsiz = lr->siz + 4096;
                newbuf = realloc(lr->buf, newsiz);
                if (newbuf == NULL)
                    return NULL;
                lr->buf = newbuf;
                lr->siz = newsiz;
            }
            lr->buf[(*len)++] = c;

            /* '\n' is end of line. */
            if (c == '\n')
                return lr->buf;
        }
    }
}

/*
 * Frees internal memory used by _lr_.
 */
void
lr_free(struct line_reader *lr)
{
    free(lr->buf);
    lr->buf = NULL;
    lr->siz = 0;
}

/*
 * Read a file line by line.
 * http://rosettacode.org/wiki/Read_a_file_line_by_line
 */
int
main()
{
    struct line_reader lr;
    FILE *f;
    size_t len;
    char *line;

    f = fopen("foobar.txt", "r");
    if (f == NULL) {
        perror("foobar.txt");
        exit(1);
    }

    /*
     * This loop reads each line.
     * Remember that line is not a C string.
     * There is no terminating '\0'.
     */
    lr_init(&lr, f);
    while (line = next_line(&lr, &len)) {
        /*
         * Do something with line.
         */
        fputs("LINE: ", stdout);
        fwrite(line, len, 1, stdout);
    }
    if (!feof(f)) {
        perror("next_line");
        exit(1);
    }
    lr_free(&lr);

    return 0;
}

将文本文件读入C中的行数组中

6 个答案: