反转从文件加载的1GB字符串

时间:2012-06-18 03:28:52

标签: c sorting

有一个1 GB的文件包含一个字符串。由于字符串非常大,因此无法完全加载到内存中。扭转这个字符串的最佳方法是什么?

5 个答案:

答案 0 :(得分:4)

将块加载到内存中,反向迭代它们,同时按顺序写出它们。伪代码:

load_block(buffer, 4mb, end of file); // Load a 4mb block from the end
for (i = 4mb; i>=0; i--) {
    write(buffer[i],1); // Write it out in reverse
}

答案 1 :(得分:1)

您可以从最后遍历文件,逐字节地将其加载到内存中(假设为8位字符),将其保存到从头开始的输出文件

答案 2 :(得分:1)

快速而肮脏的方式。

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
int main(int argc, char ** argv) {
    FILE * in,*out;
    assert(argc>2);
    in = fopen(argv[1],"rb");
    out = fopen(argv[2],"wb");
    assert(in);
    assert(out);
    assert(0==fseek(in,0,SEEK_END));
    assert(0==fseek(in,-1,SEEK_CUR));
    fputc(fgetc(in),out);
    while (!fseek(in,-2,SEEK_CUR)) {
            fputc(fgetc(in),out);
    }
    fclose(in);
    fclose(out);
    return 0;
}

根据评论添加。

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

void flip(char *a, int size) {
    int i;char c;
    for (i=0;i<size/2;++i) {
            c=a[size-1-i];
            a[size-1-i] = a[i];
            a[i]=c;
    }
}
int main(int argc, char ** argv) {
    const size_t chunksize = 4096;
    char buffer[chunksize];
    size_t chunks;
    size_t rest;
    FILE * in,*out;
    size_t t;
    assert(argc>2);
    in = fopen(argv[1],"rb");
    out = fopen(argv[2],"wb");
    assert(in);
    assert(out);
    assert(0==fseek(in,0,SEEK_END));
    t = ftell(in);
    assert(t>0);
    chunks = t/chunksize;
    rest = t%chunksize;
    assert(0==fseek(in,-rest,SEEK_CUR));
    assert(rest == fread(buffer, 1, rest, in));
    flip(buffer, rest);
    assert(rest == fwrite(buffer,1,rest,out));
    while (!fseek(in,-(chunksize+rest),SEEK_CUR)) {
            rest = chunksize;
            assert(rest == fread(buffer, 1, rest, in));
            flip(buffer, rest);
            assert(rest == fwrite(buffer,1,rest,out));
    }
    fclose(in);
    fclose(out);
    return 0;
}

答案 3 :(得分:0)

我认为,问题的最佳选择是使用mmap将文件加载到内存中。 请参阅mmap手册页以加载文件或阅读以下链接 When should I use mmap for file access?

答案 4 :(得分:0)

将字符串划分为内存可以处理的子字符串。从末尾读取这些子字符串。反转每个子字符串并将结果打印到输出文件。