我有两个文件如下:
File1中
the quick brown fox jumps
jumps over the very lazy dog
brown fox jumps over the
lorem ipsum dolor
文件2
jumps over the very lazy *chicken*
brown fox jumps over the
the quick brown fox *swims*
an apple a day keeps the doctor away
我需要DIFF两个文件并从中提取两个文件中的任何一个中的唯一行。
但问题是:
因此,根据上述条件,预期输出为:
File1中
lorem ipsum dolor
文件2
an apple a day keeps the doctor away
任何人都能快速有效地了解DIFF的方法吗? (最短的解决方案,易于阅读的输出)我尝试过的是使用excel在视觉上比较两个文件。但我要对 LOT 的一对日志文件执行此操作。它将永远地完成它们。
任何更好的建议都表示赞赏。
谢谢和最诚挚的问候。
答案 0 :(得分:1)
为什么不为该作业编写一个小程序,所以它适用于两个平台?它可以在一些独立于平台的C代码中轻松完成:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct Line
{
char *line;
char *tokens;
size_t nwords;
const char **words;
} Line;
char *copyString(const char *s)
{
char *r = malloc(strlen(s) + 1);
if (!r) exit(EXIT_FAILURE);
strcpy(r, s);
return r;
}
int compareLines(const void *a, const void *b)
{
const Line *line1 = a;
const Line *line2 = b;
size_t mw = line1->nwords;
if (line2->nwords < mw) mw = line2->nwords;
for (size_t i = 0; i < mw; ++i)
{
int r = strcmp(line1->words[i], line2->words[i]);
if (r) return r;
}
if (line1->nwords > mw) return 1;
if (line2->nwords > mw) return -1;
return 0;
}
size_t readFile(Line **linesptr, FILE *f, size_t wordCount)
{
size_t cap = 256;
size_t n = 0;
char buf[1024];
Line *lines = malloc(cap * sizeof(Line));
if (!lines) exit(EXIT_FAILURE);
while (fgets(buf, 1024, f))
{
if (n == cap)
{
cap *= 2;
lines = realloc(lines, cap * sizeof(Line));
if (!lines) exit(EXIT_FAILURE);
}
lines[n].line = copyString(buf);
lines[n].tokens = copyString(buf);
lines[n].words = malloc(wordCount * sizeof(const char *));
if (!lines[n].words) exit(EXIT_FAILURE);
size_t c = 0;
char *word = strtok(lines[n].tokens, " \t");
while (word && c < wordCount)
{
lines[n].words[c++] = word;
if (c == wordCount) break;
word = strtok(0, " \t");
}
lines[n].nwords = c;
lines[n].words = realloc(lines[n].words, c * sizeof(const char *));
if (!lines[n].words) exit(EXIT_FAILURE);
++n;
}
lines = realloc(lines, n * sizeof(Line));
if (!lines) exit(EXIT_FAILURE);
qsort(lines, n, sizeof(Line), compareLines);
*linesptr = lines;
return n;
}
void freeLines(Line *lines, size_t n)
{
for (size_t i = 0; i < n; ++i)
{
free(lines[i].words);
free(lines[i].tokens);
free(lines[i].line);
}
free(lines);
}
int main(int argc, char **argv)
{
if (argc != 4)
{
fprintf(stderr, "Usage: %s [n] [file1] [file2]\n", argv[0]);
return EXIT_FAILURE;
}
int nwords = atoi(argv[1]);
if (!nwords) return EXIT_FAILURE;
FILE *f1 = fopen(argv[2], "r");
if (!f1) return EXIT_FAILURE;
FILE *f2 = fopen(argv[3], "r");
if (!f2) return EXIT_FAILURE;
Line *f1lines = 0;
size_t nf1lines = readFile(&f1lines, f1, nwords);
if (!f1lines) return EXIT_FAILURE;
Line *f2lines = 0;
size_t nf2lines = readFile(&f2lines, f2, nwords);
if (!f2lines) return EXIT_FAILURE;
fclose(f1);
fclose(f2);
size_t f1pos = 0;
size_t f2pos = 0;
while (f1pos < nf1lines && f2pos < nf2lines)
{
int cmp = compareLines(f1lines + f1pos, f2lines + f2pos);
if (cmp)
{
if (cmp < 0)
{
printf("%s: %s", argv[2], f1lines[f1pos++].line);
}
else
{
printf("%s: %s", argv[3], f2lines[f2pos++].line);
}
}
else
{
++f1pos;
++f2pos;
}
}
while (f1pos < nf1lines)
{
printf("%s: %s", argv[2], f1lines[f1pos++].line);
}
while (f2pos < nf2lines)
{
printf("%s: %s", argv[3], f2lines[f2pos++].line);
}
freeLines(f1lines, nf1lines);
freeLines(f2lines, nf2lines);
return EXIT_SUCCESS;
}
如果您使用gcc,请使用
编译gcc -s -g0 -O2 -std=c11 -Wall -Wextra -pedantic -ofinduniq finduniq.c
演示:
$ ./finduniq 4 test1.txt test2.txt
test2.txt: an apple a day keeps the doctor away
test1.txt: lorem ipsum dolor
$ ./finduniq 6 test1.txt test2.txt
test2.txt: an apple a day keeps the doctor away
test2.txt: jumps over the very lazy *chicken*
test1.txt: jumps over the very lazy dog
test1.txt: lorem ipsum dolor
test2.txt: the quick brown fox *swims*
test1.txt: the quick brown fox jumps
答案 1 :(得分:0)
$ diff file1 file2 | grep "<\|>" | sed -E 's/^(<|>) //g' | sort | uniq -w5 -u
diff
- 比较文件file1
和file2
,
grep
和sed
删除多余的行和符号,然后删除sort
个字符串,
uniq
输出唯一字符串(-w5
比较行中的前5个字符,尝试解决问题列表中的问题#3)