从C中的文件中读取特定行数(scanf,fseek,fgets)

时间:2010-10-13 11:28:31

标签: c file parent-child scanf fseek

我有一个进程主数据生成 N子进程,它通过未命名的管道与父进程通信。我必须能够:

  • 让父亲打开文件,然后向每个孩子发送一个结构,告知它必须从 min 读到 max 行;
  • 这将在同一时间发生,所以我不知道:
  • 1如何划分N个地图的total_lines和
  • 第二,我如何让每个孩子只读出它应该的行?

我的问题与O.S.无关。概念,只有文件操作:S

也许fseek?我无法mmap日志文件(有些超过1GB)。

我会很感激一些想法。提前谢谢

编辑:我试图让孩子们在不使用fseek和块的价值的情况下阅读各自的行,所以,有人可以告诉我这是否有效? :

//somewhere in the parent process:

    FILE* logFile = fopen(filename, "r");
                while (fgets(line, 1024, logFile) != NULL) {
                    num_lines++;
                }
                rewind(logFile);
                int prev = 0;
                for (i = 0; i < maps_nr; i++) {
                    struct send_to_Map request;
                    request.fp = logFile;
                    request.lower = lowLimit;
                    request.upper = highLimit;

                    if (i == 0)
                        request.minLine = 0;
                    else
                        request.minLine = 1 + prev;
                    if(i!=maps_nr-1)
                        request.maxLine = (request.minLine + num_lines / maps_nr) - 1;
                    else
                       request.maxLine = (request.minLine + num_lines / maps_nr)+(num_lines%maps_nr);
                    prev = request.maxLine;

                }
                //write this structure to respective pipe


//child process:

while(1) {
      ...
      //reads the structure to pipe (and knows which lines to read)
      int n=0, counter=0;
      while (fgets(line, 1024, logFile) != NULL){
         if (n>=minLine and n<=maxLine)
              counter+= process(Line);//returns 1 if IP was found, in that line, between the low and high limit
         n++; 
      }
     //(...) 
}

我不知道它是否会起作用,我只是为了让它发挥作用!即使这样,是否有可能胜过读取整个文件的单个进程并打印日志文件中找到的总ips数量?

3 个答案:

答案 0 :(得分:1)

如果您不关心将文件完全分开,并且行长度的分布在整个文件上有些均匀,则可以避免在父文件中读取整个文件一次。 / p>

  1. 获取文件大小。
  2. chunk_size = file_size / number_of_children
  3. 当你在父母中产生每个孩子时:
    • 寻求(child_num + 1)* chunk_size
    • 向前看,直到找到换行符。
    • 生成孩子,告诉它从上一个块的末尾开始(或者第一个孩子为0),以及块的实际长度。
  4. 每个孩子都寻求start并阅读chunk_size字节。
  5. 这是该策略的粗略草图。

    已编辑以简化操作。

    编辑:这是第3步的未经测试的代码,以及下面的第4步。这一切都是未经测试的,我对一分一误的错误并不小心,但它让您了解fseekftell的使用情况,这听起来就像您要找的那样

    // Assume FILE* f is open to the file, chunk_size is the average expected size,
    // child_num is the id of the current child, spawn_child() is a function that
    // handles the logic of spawning a child and telling it where to start reading,
    // and how much to read. child_chunks[] is an array of structs to keep track of
    // where the chunks start and how big they are.
    if(fseek(f, child_num * chunk_size, SEEK_SET) < 0) { handle_error(); }
    int ch;
    while((ch = fgetc(f)) != FEOF && ch != '\n')
    {/*empty*/}
    
    // FIXME: needs to handle EOF properly.
    child_chunks[child_num].end = ftell(f); // FIXME: needs error check.
    child_chunks[child_num+1].start = child_chunks[child_num].end + 1;
    spawn_child(child_num);
    

    然后在您的孩子(步骤4)中,假设孩子有权访问child_chunks[]并知道其child_num

    void this_is_the_child(int child_num)
    {
        /* ... */
    
        fseek(f, child_chunks[child_num].start, SEEK_SET); // FIXME: handle error
        while(fgets(...) && ftell(f) < child_chunks[child_num].end)
        {
        }
    }
    

答案 1 :(得分:1)

/* get an array with line-startpositions (file-offsets) */
fpos_t readLineBegins(FILE *f,fpos_t **begins)
{
  fpos_t ch=0, mark=0, num=0;
  *begins = 0;
  do {
    if( ch=='\n' )
    {
       *begins = realloc( *begins, ++num * sizeof(fpos_t) );
      (*begins)[num-1] = mark;
        mark = ftell(f);
    }
  } while( (ch=fgetc(f))!=EOF );

  if( mark<ftell(f) )
  {
    *begins = realloc( *begins, ++num * sizeof(fpos_t) );
    (*begins)[num-1]=mark;
  }

  return num;
}

/* output linenumber beg...end */
void workLineBlocks(FILE *f,fpos_t *begins,fpos_t beg,fpos_t end)
{
  while( beg<=end )
  {
    int ch;
    fsetpos( f, &begins[beg] ); /* set linestart-position */
    printf("%ld:", ++beg );
    while( (ch=fgetc(f))!=EOF && ch!='\n' && ch!='\r' )
      putchar(ch);
    puts("");
  }
}

main()
{
  FILE *f=fopen("file.txt","rb");
  fpos_t *lineBegins, /* Array with line-startpositions */
  lb = readLineBegins(f,&lineBegins); /* get number of lines */

  workLineBlocks(f,lineBegins,lb-2,lb-1); /* out last two lines */
  workLineBlocks(f,lineBegins,0,1); /* out first two lines */

  fclose(f);
  free(lineBegins);
}

答案 2 :(得分:1)

我认为它可以帮到你:Read specific range of lines form a text file