比较两个不同文件中的每一行,并打印C中不同的行

时间:2016-06-06 21:47:32

标签: c io

假设我有两个这样的文件:

FILE1.TXT

john
is
the new
guy

FILE2.TXT

man
the old
is
rick
cat
dog

我想将file1的第一行与file2中的所有行进行比较,并验证它是否存在。如果没有,请从file1开始第二行,并将其与file2的所有行进行比较..依此类推,直到eof到达file1

我期望的输出是:

john
the new
guy

我认为应该怎么做:

  • 阅读file1file2
  • 创建一个返回每个行号的函数
  • file1获取第一行并将其与file2
  • 中的所有行进行比较
  • 执行此操作,直到file1的所有行都被浪费

现在,我不知道自己做错了什么,但我没有得到我期望的结果:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int countlines(char *filename)
{                                
    int ch = 0, lines = 0;
    FILE *fp = fopen(filename, "r");
    if (fp == NULL)
        return 0;

    do {
        ch = fgetc(fp);
        if (ch == '\n')
            lines++;
    } while (ch != EOF);

    if (ch != '\n' && lines != 0)
        lines++;
    fclose(fp);

    return lines;
}

int main(int argc, char *argv[])
{
    FILE *template_file = fopen(argv[1], "r");
    FILE *data_file = fopen(argv[2], "r");

    char buffer_line_template_file[100];
    char buffer_line_data_file[100];


    if (argc != 3)
    {
        perror("You didn't insert all the arguments!\n\n");
        exit(EXIT_FAILURE);
    }

    if (template_file == NULL || data_file == NULL)
    {
        perror("Error while opening the file!\n\n");
        exit(EXIT_FAILURE);
    }

    int counter = 0;
    for (int i = 0; i < countlines(argv[1]); i++)
    {
        fgets(buffer_line_template_file, 100, template_file);

        for (int j = 0; j < countlines(argv[2]); j++)
        {
            fgets(buffer_line_data_file, 100, data_file);

            if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
            {
                counter++;
                printf("%d", counter);
            }
        }
    }

    printf("\n\n");

    return 0;
}

有人可以指出我正确的方向吗?出于测试目的,我在最后创建了一个计数器,这是一个小调试的一部分。应该有print()函数

根据@chux answer,我得到了以下简化代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int main(int argc, char *argv[])
{
    FILE *template_file = fopen(argv[1], "r");
    FILE *data_file = fopen(argv[2], "r");

    char buffer_line_template_file[100];
    char buffer_line_data_file[100];


    if (argc != 3)
    {
        perror("You didn't insert all the arguments!\n\n");
        exit(EXIT_FAILURE);
    }

    if (template_file == NULL || data_file == NULL)
    {
        perror("Error while opening the file!\n\n");
        exit(EXIT_FAILURE);
    }

    while(fgets(buffer_line_template_file, 100, template_file))
    {
        buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';

        rewind(data_file);
        while (fgets(buffer_line_data_file, 100, data_file))
        {
            buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';

            if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
            {
                printf("%s\n", buffer_line_template_file);
            }
        }
    }

    printf("\n\n");

    return 0;
}

上面的代码给出了以下输出,这不是预期的结果:

john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy

3 个答案:

答案 0 :(得分:2)

OP代码的问题

  1. 行的定义不准确。

  2. 过度重新计算

  3. 模糊确定文件中的行数。

    1. 与在{C中具有精确定义的string不同,读取的定义不是很明确。主要特异性问题:一行包含尾随'\n'。如果第一个答案是,那么'\n'之后文件中的最后一个文本是否构成一条线? (过长的行是另一个问题,但今天我们不要处理它。)
    2. 因此可能某些行以'\n'结尾,而其他行则不然,愚弄strcmp("dog", "dog\n")

      最简单的解决方案是读取,直到1)遇到'\n',2)发生EOF或3)行缓冲区已满。然后在获得一行后,删掉潜在尾随'\n'

      现在所有代码随后都没有'\n'

      fgets(buffer_line_template_file, 100, template_file);
      buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
      
      1. OP的循环令人难以置信的浪费。考虑一个包含1000行的文件。当一个countlines()调用足够时,代码将循环播放1000次countlines()(每个countlines()调用读取1000行)次。

        // for (int j = 0; j < countlines(argv[2]); j++)
        int j_limit = countlines(argv[2]);
        for (int j = 0; j < j_limit; j++)
        
      2. 无论如何都不需要计算行数,只需继续EOFfgets()返回NULL)。所以不需要修复它的模糊定义。 (模糊与#1相同的问题)

        int counter = 0;
        for (fgets(buffer_line_template_file, 100, template_file)) {
          buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
        
          rewind(data_file);
          while ((fgets(buffer_line_data_file, 100, data_file)) {
            buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
        
            if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
              counter++;
              printf("%d", counter);
            }
          }
        }
        
      3. 可能的其他简化 - 另一天。

        FWIW,在计算文本的之后,允许文件中的最后一行以'\n'结尾。

            unsigned long long FileLineCount(FILE *istream) {
              unsigned long long LineCount = 0;
              rewind(istream);
              int previous = '\n';
              int ch;
        
              while ((ch = fgetc(inf)) != EOF) { 
                if (previous == '\n') LineCount++;
                previous = ch;
              }
              return LineCount;
            }
        

        请注意,此函数可能会得到fgets()调用的不同结果。考虑一个包含150个字符的的文件。 fgets(..., 100,...)将报告2行。 FileLineCount()报告1。

        [编辑]更新了符合OP功能的代码。

            int found = 0;
            while (fgets(buffer_line_data_file, 100, data_file))
            {
                buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
        
                if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
                {
                    found = 1;
                    break;
                }
            }
            if (!found) printf("%s\n", buffer_line_template_file);
        

答案 1 :(得分:1)

此程序打印两个文件file1.txtfile2.txt的差异。

#include<stdio.h>
#include <stdlib.h>
#include <memory.h>

int main() {
    FILE *fp1, *fp2;
    int ch1, ch2;
    char fname1[40], fname2[40];

    char *line = NULL;
    size_t len = 0;
    ssize_t read;

    char *line2 = NULL;
    size_t len2 = 0;
    ssize_t read2;

    fp1 = fopen("file1.txt", "r");
    fp2 = fopen("file2.txt", "r");

    if (fp1 == NULL) {
        printf("Cannot open %s for reading ", fname1);
        exit(1);
    } else if (fp2 == NULL) {
        printf("Cannot open %s for reading ", fname2);
        exit(1);
    } else {
        while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
            if (!strcmp(line, line2)) {
                printf("Retrieved diff on line %zu :\n", read);
                printf("%s", line);
            }
        }
        if (ch1 == ch2)
            printf("Files are identical \n");
        else if (ch1 != ch2)
            printf("Files are Not identical \n");

        fclose(fp1);
        fclose(fp2);
    }
    return (0);
}

答案 2 :(得分:1)

你已经有了一个非常好的答案(并且总是来自chux),但这是一个稍微不同的问题方法。它使用自动存储将file2读入字符串数组,然后将file1中的每一行与file2中的每一行进行比较,以确定它是否是唯一的。您可以轻松地将代码转换为动态分配内存,但为了省略了复杂性:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { MAXC = 256, MAXL = 512 };

void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);

int main (int argc, char **argv) {

    FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
    FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
    size_t n1 = 0, n2 = 0;

    if (!fp1 || !fp2) {
        fprintf (stderr, "error: file open failed.\n");
        return 1;
    }

    printf ("\nunique words in file1, not in file 2.\n\n");
    file1infile2 (fp2, fp1, &n2, &n1);
    printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
            n1, n2);

    return 0;
}

void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
    char buf[MAXC] = "";
    char f2buf[MAXL][MAXC] = { "" };
    size_t i;
    *n1 = *n2 = 0;

    while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
        char *np = 0;
        if (!(np = strchr (buf, '\n'))) {
            fprintf (stderr, "error: line exceeds MAXC chars.\n");
            exit (EXIT_FAILURE);
        }
        *np = 0;
        strcpy (f2buf[(*n2)++], buf);
    }

    while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
        char *np = 0;
        if (!(np = strchr (buf, '\n'))) {
            fprintf (stderr, "error: line exceeds MAXC chars.\n");
            exit (EXIT_FAILURE);
        }
        *np = 0, (*n1)++;

        for (i = 0; i < *n2; i++)
            if (!(strcmp (f2buf[i], buf)))
                goto matched;

        printf ("  %s\n", buf);
        matched:;
    }
}

查看代码,如果您有任何问题,请告诉我。

示例使用/输出

$ ./bin/f1inf2 dat/f1 dat/f2

unique words in file1, not in file 2.

  john
  the new
  guy

analyzed 4 lines in file1 against 6 lines in file2.