我想比较相同行的2个文件:mytab2411.txt(大小为15,017,210字节)和shadow.txt(大小为569字节)但是当我编译这段代码并运行程序时,我遇到了分段错误。我知道这是因为“mytab2411.txt”文件超过了“char buf”的大小,但是如何在不溢出缓冲区的情况下解决这个问题呢?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("shadow.txt", "rt");
f2 = fopen("mytab2411.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}
答案 0 :(得分:1)
您的方法假定文件中的每一行都是1024字节长。实际上,这些行最多可达1024个字节,但大多数行都要短得多。使用strdup
或malloc
根据行的长度为每行分配内存。
将行存储在动态分配的数组中。这是大约15 MB的数据,除非存在资源限制,否则它应该不是问题。
int main(void)
{
char buf[1024];
char **arr1 = NULL;
char **arr2 = NULL;
int size1 = 0;
int size2 = 0;
FILE * f1, *f2;
f1 = fopen("shadow.txt", "r");
f2 = fopen("mytab2411.txt", "r");
while(fgets(buf, 1024, f1))
{
size1++;
arr1 = realloc(arr1, sizeof(char*) * size1);
arr1[size1 - 1] = strdup(buf);
}
while(fgets(buf, 1024, f2))
{
size2++;
arr2 = realloc(arr2, sizeof(char*) * size2);
arr2[size2 - 1] = strdup(buf);
}
for(int i = 0; i < size1; i++)
for(int j = 0; j < size2; j++)
{
if(strcmp(arr1[i], arr2[j]) == 0)
printf("match %s\n", arr1[i]);
}
return 0;
}