Question

假设我有两个这样的文件：

FILE1.TXT

john
is
the new
guy

FILE2.TXT

man
the old
is
rick
cat
dog

我想将file1的第一行与file2中的所有行进行比较，并验证它是否存在。如果没有，请从file1开始第二行，并将其与file2的所有行进行比较..依此类推，直到eof到达file1。

我期望的输出是：

john
the new
guy

我认为应该怎么做：

阅读file1和file2
创建一个返回每个行号的函数
从file1获取第一行并将其与file2
执行此操作，直到file1的所有行都被浪费

现在，我不知道自己做错了什么，但我没有得到我期望的结果：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int countlines(char *filename)
{                                
    int ch = 0, lines = 0;
    FILE *fp = fopen(filename, "r");
    if (fp == NULL)
        return 0;

    do {
        ch = fgetc(fp);
        if (ch == '\n')
            lines++;
    } while (ch != EOF);

    if (ch != '\n' && lines != 0)
        lines++;
    fclose(fp);

    return lines;
}

int main(int argc, char *argv[])
{
    FILE *template_file = fopen(argv[1], "r");
    FILE *data_file = fopen(argv[2], "r");

    char buffer_line_template_file[100];
    char buffer_line_data_file[100];


    if (argc != 3)
    {
        perror("You didn't insert all the arguments!\n\n");
        exit(EXIT_FAILURE);
    }

    if (template_file == NULL || data_file == NULL)
    {
        perror("Error while opening the file!\n\n");
        exit(EXIT_FAILURE);
    }

    int counter = 0;
    for (int i = 0; i < countlines(argv[1]); i++)
    {
        fgets(buffer_line_template_file, 100, template_file);

        for (int j = 0; j < countlines(argv[2]); j++)
        {
            fgets(buffer_line_data_file, 100, data_file);

            if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
            {
                counter++;
                printf("%d", counter);
            }
        }
    }

    printf("\n\n");

    return 0;
}

有人可以指出我正确的方向吗？出于测试目的，我在最后创建了一个计数器，这是一个小调试的一部分。应该有print()函数

根据@chux answer，我得到了以下简化代码：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int main(int argc, char *argv[])
{
    FILE *template_file = fopen(argv[1], "r");
    FILE *data_file = fopen(argv[2], "r");

    char buffer_line_template_file[100];
    char buffer_line_data_file[100];


    if (argc != 3)
    {
        perror("You didn't insert all the arguments!\n\n");
        exit(EXIT_FAILURE);
    }

    if (template_file == NULL || data_file == NULL)
    {
        perror("Error while opening the file!\n\n");
        exit(EXIT_FAILURE);
    }

    while(fgets(buffer_line_template_file, 100, template_file))
    {
        buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';

        rewind(data_file);
        while (fgets(buffer_line_data_file, 100, data_file))
        {
            buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';

            if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
            {
                printf("%s\n", buffer_line_template_file);
            }
        }
    }

    printf("\n\n");

    return 0;
}

上面的代码给出了以下输出，这不是预期的结果：

john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy

Answer 1

OP代码的问题

行的定义不准确。
过度重新计算
模糊确定文件中的行数。

与在{C中具有精确定义的string不同，读取行的定义不是很明确。主要特异性问题：一行包含尾随'\n'。如果第一个答案是是，那么'\n'之后文件中的最后一个文本是否构成一条线？（过长的行是另一个问题，但今天我们不要处理它。）

因此可能某些行以'\n'结尾，而其他行则不然，愚弄strcmp("dog", "dog\n")。

最简单的解决方案是读取行，直到1）遇到'\n'，2）发生EOF或3）行缓冲区已满。然后在获得一行后，删掉潜在尾随'\n'。

现在所有行代码随后都没有'\n'。

fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';

OP的循环令人难以置信的浪费。考虑一个包含1000行的文件。当一个countlines()调用足够时，代码将循环播放1000次countlines()（每个countlines()调用读取1000行）次。
```
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
```

无论如何都不需要计算行数，只需继续EOF（fgets()返回NULL）。所以不需要修复它的模糊定义。（模糊与＃1相同的问题）

int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
  buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';

  rewind(data_file);
  while ((fgets(buffer_line_data_file, 100, data_file)) {
    buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';

    if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
      counter++;
      printf("%d", counter);
    }
  }
}

可能的其他简化 - 另一天。

FWIW，在计算文本的行之后，允许文件中的最后一行以'\n'结尾。

    unsigned long long FileLineCount(FILE *istream) {
      unsigned long long LineCount = 0;
      rewind(istream);
      int previous = '\n';
      int ch;

      while ((ch = fgetc(inf)) != EOF) { 
        if (previous == '\n') LineCount++;
        previous = ch;
      }
      return LineCount;
    }

请注意，此函数可能会得到fgets()调用的不同结果。考虑一个包含150个字符的行的文件。 fgets(..., 100,...)将报告2行。 FileLineCount()报告1。

[编辑]更新了符合OP功能的代码。

    int found = 0;
    while (fgets(buffer_line_data_file, 100, data_file))
    {
        buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';

        if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
        {
            found = 1;
            break;
        }
    }
    if (!found) printf("%s\n", buffer_line_template_file);

Answer 2

此程序打印两个文件file1.txt和file2.txt的差异。

#include<stdio.h>
#include <stdlib.h>
#include <memory.h>

int main() {
    FILE *fp1, *fp2;
    int ch1, ch2;
    char fname1[40], fname2[40];

    char *line = NULL;
    size_t len = 0;
    ssize_t read;

    char *line2 = NULL;
    size_t len2 = 0;
    ssize_t read2;

    fp1 = fopen("file1.txt", "r");
    fp2 = fopen("file2.txt", "r");

    if (fp1 == NULL) {
        printf("Cannot open %s for reading ", fname1);
        exit(1);
    } else if (fp2 == NULL) {
        printf("Cannot open %s for reading ", fname2);
        exit(1);
    } else {
        while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
            if (!strcmp(line, line2)) {
                printf("Retrieved diff on line %zu :\n", read);
                printf("%s", line);
            }
        }
        if (ch1 == ch2)
            printf("Files are identical \n");
        else if (ch1 != ch2)
            printf("Files are Not identical \n");

        fclose(fp1);
        fclose(fp2);
    }
    return (0);
}

Answer 3

你已经有了一个非常好的答案（并且总是来自chux），但这是一个稍微不同的问题方法。它使用自动存储将file2读入字符串数组，然后将file1中的每一行与file2中的每一行进行比较，以确定它是否是唯一的。您可以轻松地将代码转换为动态分配内存，但为了省略了复杂性：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { MAXC = 256, MAXL = 512 };

void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);

int main (int argc, char **argv) {

    FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
    FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
    size_t n1 = 0, n2 = 0;

    if (!fp1 || !fp2) {
        fprintf (stderr, "error: file open failed.\n");
        return 1;
    }

    printf ("\nunique words in file1, not in file 2.\n\n");
    file1infile2 (fp2, fp1, &n2, &n1);
    printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
            n1, n2);

    return 0;
}

void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
    char buf[MAXC] = "";
    char f2buf[MAXL][MAXC] = { "" };
    size_t i;
    *n1 = *n2 = 0;

    while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
        char *np = 0;
        if (!(np = strchr (buf, '\n'))) {
            fprintf (stderr, "error: line exceeds MAXC chars.\n");
            exit (EXIT_FAILURE);
        }
        *np = 0;
        strcpy (f2buf[(*n2)++], buf);
    }

    while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
        char *np = 0;
        if (!(np = strchr (buf, '\n'))) {
            fprintf (stderr, "error: line exceeds MAXC chars.\n");
            exit (EXIT_FAILURE);
        }
        *np = 0, (*n1)++;

        for (i = 0; i < *n2; i++)
            if (!(strcmp (f2buf[i], buf)))
                goto matched;

        printf ("  %s\n", buf);
        matched:;
    }
}

查看代码，如果您有任何问题，请告诉我。

示例使用/输出

$ ./bin/f1inf2 dat/f1 dat/f2

unique words in file1, not in file 2.

  john
  the new
  guy

analyzed 4 lines in file1 against 6 lines in file2.

比较两个不同文件中的每一行，并打印C中不同的行

3 个答案: