假设我有两个这样的文件:
FILE1.TXT
john
is
the new
guy
FILE2.TXT
man
the old
is
rick
cat
dog
我想将file1
的第一行与file2
中的所有行进行比较,并验证它是否存在。如果没有,请从file1
开始第二行,并将其与file2
的所有行进行比较..依此类推,直到eof
到达file1
。
我期望的输出是:
john
the new
guy
我认为应该怎么做:
file1
和file2
file1
获取第一行并将其与file2
file1
的所有行都被浪费现在,我不知道自己做错了什么,但我没有得到我期望的结果:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int countlines(char *filename)
{
int ch = 0, lines = 0;
FILE *fp = fopen(filename, "r");
if (fp == NULL)
return 0;
do {
ch = fgetc(fp);
if (ch == '\n')
lines++;
} while (ch != EOF);
if (ch != '\n' && lines != 0)
lines++;
fclose(fp);
return lines;
}
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
int counter = 0;
for (int i = 0; i < countlines(argv[1]); i++)
{
fgets(buffer_line_template_file, 100, template_file);
for (int j = 0; j < countlines(argv[2]); j++)
{
fgets(buffer_line_data_file, 100, data_file);
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
counter++;
printf("%d", counter);
}
}
}
printf("\n\n");
return 0;
}
有人可以指出我正确的方向吗?出于测试目的,我在最后创建了一个计数器,这是一个小调试的一部分。应该有print()
函数
根据@chux answer,我得到了以下简化代码:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
FILE *template_file = fopen(argv[1], "r");
FILE *data_file = fopen(argv[2], "r");
char buffer_line_template_file[100];
char buffer_line_data_file[100];
if (argc != 3)
{
perror("You didn't insert all the arguments!\n\n");
exit(EXIT_FAILURE);
}
if (template_file == NULL || data_file == NULL)
{
perror("Error while opening the file!\n\n");
exit(EXIT_FAILURE);
}
while(fgets(buffer_line_template_file, 100, template_file))
{
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0)
{
printf("%s\n", buffer_line_template_file);
}
}
}
printf("\n\n");
return 0;
}
上面的代码给出了以下输出,这不是预期的结果:
john
john
john
john
john
john
is
is
is
is
is
the new
the new
the new
the new
the new
the new
guy
guy
guy
guy
guy
guy
答案 0 :(得分:2)
OP代码的问题
行的定义不准确。
过度重新计算
模糊确定文件中的行数。
string
不同,读取行的定义不是很明确。主要特异性问题:一行包含尾随'\n'
。如果第一个答案是是,那么'\n'
之后文件中的最后一个文本是否构成一条线? (过长的行是另一个问题,但今天我们不要处理它。)因此可能某些行以'\n
'结尾,而其他行则不然,愚弄strcmp("dog", "dog\n")
。
最简单的解决方案是读取行,直到1)遇到'\n'
,2)发生EOF
或3)行缓冲区已满。然后在获得一行后,删掉潜在尾随'\n'
。
现在所有行代码随后都没有'\n'
。
fgets(buffer_line_template_file, 100, template_file);
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
OP的循环令人难以置信的浪费。考虑一个包含1000行的文件。当一个countlines()
调用足够时,代码将循环播放1000次countlines()
(每个countlines()
调用读取1000行)次。
// for (int j = 0; j < countlines(argv[2]); j++)
int j_limit = countlines(argv[2]);
for (int j = 0; j < j_limit; j++)
无论如何都不需要计算行数,只需继续EOF
(fgets()
返回NULL
)。所以不需要修复它的模糊定义。 (模糊与#1相同的问题)
int counter = 0;
for (fgets(buffer_line_template_file, 100, template_file)) {
buffer_line_template_file[strcspn(buffer_line_template_file, "\n")] = '\0';
rewind(data_file);
while ((fgets(buffer_line_data_file, 100, data_file)) {
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) != 0) {
counter++;
printf("%d", counter);
}
}
}
可能的其他简化 - 另一天。
FWIW,在计算文本的行之后,允许文件中的最后一行以'\n'
结尾。
unsigned long long FileLineCount(FILE *istream) {
unsigned long long LineCount = 0;
rewind(istream);
int previous = '\n';
int ch;
while ((ch = fgetc(inf)) != EOF) {
if (previous == '\n') LineCount++;
previous = ch;
}
return LineCount;
}
请注意,此函数可能会得到fgets()
调用的不同结果。考虑一个包含150个字符的行的文件。 fgets(..., 100,...)
将报告2行。 FileLineCount()
报告1。
[编辑]更新了符合OP功能的代码。
int found = 0;
while (fgets(buffer_line_data_file, 100, data_file))
{
buffer_line_data_file[strcspn(buffer_line_data_file, "\n")] = '\0';
if (strcmp(buffer_line_template_file, buffer_line_data_file) == 0)
{
found = 1;
break;
}
}
if (!found) printf("%s\n", buffer_line_template_file);
答案 1 :(得分:1)
此程序打印两个文件file1.txt
和file2.txt
的差异。
#include<stdio.h>
#include <stdlib.h>
#include <memory.h>
int main() {
FILE *fp1, *fp2;
int ch1, ch2;
char fname1[40], fname2[40];
char *line = NULL;
size_t len = 0;
ssize_t read;
char *line2 = NULL;
size_t len2 = 0;
ssize_t read2;
fp1 = fopen("file1.txt", "r");
fp2 = fopen("file2.txt", "r");
if (fp1 == NULL) {
printf("Cannot open %s for reading ", fname1);
exit(1);
} else if (fp2 == NULL) {
printf("Cannot open %s for reading ", fname2);
exit(1);
} else {
while ((read = getline(&line, &len, fp1)) != -1 && (read2 = getline(&line2, &len2, fp2)) != -1) {
if (!strcmp(line, line2)) {
printf("Retrieved diff on line %zu :\n", read);
printf("%s", line);
}
}
if (ch1 == ch2)
printf("Files are identical \n");
else if (ch1 != ch2)
printf("Files are Not identical \n");
fclose(fp1);
fclose(fp2);
}
return (0);
}
答案 2 :(得分:1)
你已经有了一个非常好的答案(并且总是来自chux),但这是一个稍微不同的问题方法。它使用自动存储将file2读入字符串数组,然后将file1中的每一行与file2中的每一行进行比较,以确定它是否是唯一的。您可以轻松地将代码转换为动态分配内存,但为了省略了复杂性:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum { MAXC = 256, MAXL = 512 };
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1);
int main (int argc, char **argv) {
FILE *fp1 = fopen (argc > 1 ? argv[1] : "file1.txt", "r");
FILE *fp2 = fopen (argc > 2 ? argv[2] : "file2.txt", "r");
size_t n1 = 0, n2 = 0;
if (!fp1 || !fp2) {
fprintf (stderr, "error: file open failed.\n");
return 1;
}
printf ("\nunique words in file1, not in file 2.\n\n");
file1infile2 (fp2, fp1, &n2, &n1);
printf ("\nanalyzed %zu lines in file1 against %zu lines in file2.\n\n",
n1, n2);
return 0;
}
void file1infile2 (FILE *fp2, FILE *fp1, size_t *n2, size_t *n1)
{
char buf[MAXC] = "";
char f2buf[MAXL][MAXC] = { "" };
size_t i;
*n1 = *n2 = 0;
while (*n2 < MAXL && fgets (buf, MAXC, fp2)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0;
strcpy (f2buf[(*n2)++], buf);
}
while (*n1 < MAXL && fgets (buf, MAXC, fp1)) {
char *np = 0;
if (!(np = strchr (buf, '\n'))) {
fprintf (stderr, "error: line exceeds MAXC chars.\n");
exit (EXIT_FAILURE);
}
*np = 0, (*n1)++;
for (i = 0; i < *n2; i++)
if (!(strcmp (f2buf[i], buf)))
goto matched;
printf (" %s\n", buf);
matched:;
}
}
查看代码,如果您有任何问题,请告诉我。
示例使用/输出
$ ./bin/f1inf2 dat/f1 dat/f2
unique words in file1, not in file 2.
john
the new
guy
analyzed 4 lines in file1 against 6 lines in file2.