C从输入文件

时间:2018-03-21 20:05:57

标签: c matrix binary fwrite fread

我试图从输入文件中读取40000x40000布尔(二进制)矩阵并将其存储在变量中。将它存储在变量中后,我想将其写入文件。但是,使用我编写的代码需要一个多小时。有人可以帮我吗?我想我做错了什么。

代码

void get_grid_values_file(bool *grid, int n, int m, char *input_filename){
    FILE *in_file;
    in_file = fopen(input_filename, "r");  
    char buffer[1]; 
    bool search = true;
    int k=0;
    while(search){
       fseek(in_file, k, SEEK_SET);
       fread(buffer, 1, 1, in_file); 
        if(*buffer == '\n')
            search = false;
        k++;         
    }
    int i,j;    
    for(i=0; i<n; i++){
        for(j=0; j<m; j++){
           fseek(in_file, k, SEEK_SET);
           fread(buffer, 1, 1, in_file);  
           *((grid+i*m) + j) = atof(buffer);   
           k+=2;
        }
    }
    fclose(in_file);
}

void set_grid_values_file(bool *grid, int n, int m, char *output_filename){
    FILE *out_file;
    out_file = fopen(output_filename, "w"); 
    char buffer[1] = " "; 
    //Set n,m and spaces
    int length_n= (int) (log10 (abs (n))) + 1;
    char char_n[length_n];
    sprintf(char_n, "%d", n);
    fseek(out_file, 0, SEEK_SET);
    fwrite (char_n, length_n, 1, out_file);
    fseek(out_file, length_n, SEEK_SET); 
    fwrite (" ", 1, 1, out_file);      
    int length_m= (int) (log10 (abs (m))) + 1;
    char char_m[length_m];
    sprintf(char_m, "%d", m);
    fseek(out_file, length_n+1, SEEK_SET);
    fwrite (char_m, length_m, 1, out_file);
    fseek(out_file, length_n+1+length_m, SEEK_SET);
    fwrite ("\n", sizeof(char), 1, out_file);
    //Set grid
    int i,j;
    int k =length_n + length_m + 2;
    for(i=0; i<n; i++){
        for(j=0; j<m; j++){
           fseek(out_file, k, SEEK_SET);           
           buffer[0] = (*((grid+i*m) + j) == true ? '1' : '0');
           fwrite (buffer, 1, 1, out_file);    
           k++;
           fseek(out_file, k, SEEK_SET); 
           fwrite (" ", 1, 1, out_file);
           k++; 
        }
        fseek(out_file, k, SEEK_SET);
        fwrite ("\n", sizeof(char), 1, out_file);
        k++;
    }
}

int main(int argc, char *argv[])
{

    char *input_filename = "gen0_40kx40k.in";
    char *output_filename = "gol_output.out";
    int n = 40000;
    int m = 40000;


    bool *grid = (bool *)malloc(n*m*sizeof(bool));
    //Read
    get_grid_values_file((bool *)grid, n, m, input_filename);

    //Write
    set_grid_values_file((bool *)grid, n, m, output_filename);

    return 0;
}

输入格式,第一行包含2d矩阵的dem:
20 20
1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 1 0 1 0 0
1 1 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 0 1 1
0 1 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 0 1
1 0 1 1 0 1 0 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 1 1 1
1 1 1 0 1 0 0 0 1 0 1 0 1 0 1 1 0 1 0 1
1 1 1 0 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 1
0 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 0 1
1 0 0 0 0 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0
1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 0
0 1 1 0 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 1
0 0 0 1 1 1 1 1 0 0 1 0 1 1 0 0 1 1 1 0
1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0
0 1 0 1 0 1 1 0 0 0 0 1 1 0 1 1 0 1 0 1
1 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 0
1 0 0 1 0 0 0 1 1 1 1 0 0 0 1 1 0 0 0 1
0 0 0 1 0 1 1 1 1 0 1 0 0 0 1 1 0 0 0 1
1 1 1 0 1 0 1 1 1 0 1 1 0 1 0 1 0 1 0 1
1 1 1 0 1 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1

3 个答案:

答案 0 :(得分:0)

从输入文件中读取较大的块 - 不是每个char都作为单独的调用。例如,一次完整的矩阵行。

为什么要在set_grid_values_file两个fwrite调用的内循环中使用。最好将它们结合起来:

char buffer[2] = "  ";
for(i=0; i<n; i++){
    int base = grid+i*m;
    for(j=0; j<m; j++){
        fseek(out_file, k, SEEK_SET);           
        buffer[0] = (*(base + j) ? '1' : '0');
        fwrite (buffer, 1, 2, out_file);    
        k+=2; 
     }

答案 1 :(得分:0)

我建议删除对fseek的调用。

while(search){

   // Make sure the read is successful. Otherwise, break out of the loop.
   if ( fread(buffer, 1, 1, in_file) != 1 )
   {
      break;
   } 

   if(*buffer == '\n')
       search = false;
   k++;         
}
int i,j;    

// Rewind the file
fseek(in_file, 0, SEEK_SET);

for(i=0; i<n; i++){
    for(j=0; j<m; j++){

      // Make sure the read is successful. Otherwise, break out of the loop.
      if ( fread(buffer, 1, 1, in_file) != 1 )
      {
         break;
      } 

      *((grid+i*m) + j) = atof(buffer);   
      k+=2;
    }
}

atof(buffer)中只有一个元素时,buffer也会出现问题。至少使用两个元素数组。

char buffer[2] = {0};

答案 2 :(得分:0)

这是关于最简单(也可能是最快)的方法。

  • stdio被缓冲,getc()很可能是一个宏
  • 函数调用很昂贵(它们会破坏指令管道和缓存);每个布尔位读取使用1seek + 1fread。
  • 你不需要寻找文件;只需按顺序读取它并将值放在正确的{row,col}位置。
  • 设置和测试指标变量(你的search)是浪费时间(通常在Pascal和Java类中讲授......);相反:只是跳出循环(或继续)
  • 我假设一个ASCII文件,值之间有空格,但顶部没有{nrow,ncol}({n,m}值作为函数参数提供)
void get_grid_values_file(bool *grid, int n, int m, char *input_filename)
{
unsigned col,row;
FILE * fp;

fp= fopen (input_filename, "r" );
if(!fp)return;

for(row=col=0; ; ) {
        int ch;
        ch=getc(fp);
        if (ch == EOF)break;
        if (ch < '0' || ch > '1') continue;

        grid[row*m+col++] = (ch == '0') ? False :True;
        if (col == m) {col=0; row++; }
        if (row == n) break;
        }
fclose(fp);
return;
}