文件大小超过缓冲区大小

时间:2017-12-21 07:04:40

标签: c linux ubuntu

我想比较相同行的2个文件:mytab2411.txt(大小为15,017,210字节)和shadow.txt(大小为569字节)但是当我编译这段代码并运行程序时,我遇到了分段错误。我知道这是因为“mytab2411.txt”文件超过了“char buf”的大小,但是如何在不溢出缓冲区的情况下解决这个问题呢?

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>

int cmp(const void * s1, const void * s2)
{
    return strcasecmp(*(char **)s1, *(char **)s2);
}

int cmp_half(const char * s1, const char * s2)
{
    int i;
    for (i = 0; i < 3; i++)
    {
        int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
        if (res != 0) return res;
    }

    return 0;
}

char * line[1024];
int n = 0;

int search(const char * s)
{
    int first, last, middle;
    first = 0;
    last = n - 1;
    middle = (first+last)/2;

    while( first <= last )
    {
        int res = cmp_half(s, line[middle]);
        if (res == 0) return middle;
        if (res > 0)
            first = middle + 1;    
        else
            last = middle - 1;

        middle = (first + last)/2;
    }
    return -1;
}

int main()
{
    FILE * f1, * f2;
    char * s;
    char buf[1024*1024], text[1024];

    f1 = fopen("shadow.txt", "rt");
    f2 = fopen("mytab2411.txt", "rt");

    s = buf;
    while (fgets(s, 1024, f2) != NULL)
    {
        line[n] = s;
        s = s+strlen(s)+1;
        n++;
    }

    qsort(line, n, sizeof(char *), cmp);

    while (fgets(text, 1024, f1) != NULL)
    {
    text[strlen(text)-1] = 0;
        int idx = search(text);
        if (idx >= 0)
        {
            printf("%s matched %s\n", text, line[idx]);
        }
        else
        {
            printf("%s not matched\n", text);
        }
    }

    return 0;
}

1 个答案:

答案 0 :(得分:1)

您的方法假定文件中的每一行都是1024字节长。实际上,这些行最多可达1024个字节,但大多数行都要短得多。使用strdupmalloc根据行的长度为每行分配内存。

将行存储在动态分配的数组中。这是大约15 MB的数据,除非存在资源限制,否则它应该不是问题。

int main(void)
{
    char buf[1024];
    char **arr1 = NULL;
    char **arr2 = NULL;
    int size1 = 0;
    int size2 = 0;
    FILE * f1, *f2;
    f1 = fopen("shadow.txt", "r");
    f2 = fopen("mytab2411.txt", "r");

    while(fgets(buf, 1024, f1))
    {
        size1++;
        arr1 = realloc(arr1, sizeof(char*) * size1);
        arr1[size1 - 1] = strdup(buf);
    }

    while(fgets(buf, 1024, f2))
    {
        size2++;
        arr2 = realloc(arr2, sizeof(char*) * size2);
        arr2[size2 - 1] = strdup(buf);
    }

    for(int i = 0; i < size1; i++)
        for(int j = 0; j < size2; j++)
        {
            if(strcmp(arr1[i], arr2[j]) == 0)
                printf("match %s\n", arr1[i]);
        }

    return 0;
}