Question

#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>

struct thread_data {
  FILE *fp;
  long int offset;
  int start;
  int blockSize;
  //struct word maybe?
};

int words = 0;

void *countFrequency(void* data) {
  struct thread_data* td = data;
  char *buffer = malloc(td->blockSize);

  int i, c;
  i = 0; c = 0;
  enum states { WHITESPACE, WORD };
  int state = WHITESPACE;

  fseek(td->fp, td->offset, td->start);

  char last = ' ';
  while ((fread(buffer, td->blockSize, 1, td->fp)) == 1) {
    if (buffer[0]== ' ' || buffer[0] == '\t') {
      state = WHITESPACE;
    } else if (buffer[0] == '\n') {
      //newLine++;
      state = WHITESPACE;
    } else {
      if (state == WHITESPACE) {
        words++;
      }
      state = WORD;
    }
    last = buffer[0];
  }

  free(buffer);
  pthread_exit(NULL);
  return NULL;
}

int main(int argc, char **argv) {

  int nthreads, x, id, blockSize, len;
  //void *state;
  FILE *fp;
  pthread_t *threads;

  fp = fopen("file1.txt", "r");

  printf("Enter the number of threads: ");
  scanf("%d", &nthreads);
  struct thread_data data[nthreads];
  threads = malloc(nthreads * sizeof(pthread_t));

  fseek(fp, 0, SEEK_END);
  len = ftell(fp);
  printf("len= %d\n", len);

  blockSize = (len + nthreads - 1) / nthreads;
  printf("size= %d\n", blockSize);

  for (id = 0; id < nthreads; id++) {
    data[id].fp = fp;
    data[id].offset = blockSize;
    data[id].start = id * blockSize + 1;
    //maybe data[id]. word struct
  }
  //LAST THREAD
  data[nthreads-1].start=(nthreads-1)*blockSize+1;

  for (id = 0; id < nthreads; id++)
    pthread_create(&threads[id], NULL, &countFrequency,&data[id]);

  for (id = 0; id < nthreads; id++)
    pthread_join(threads[id],NULL);

  fclose(fp);

  printf("%d\n",words);
  return 0;
}

我有一个我在这个程序中修复的分段错误，但是现在当我运行它时，我得到0个单词，这是不正确的，因为文本文件中有大约一百万个单词。

有谁能告诉我为什么它会给我一个不正确的字数？

Answer 1

您遇到的一个问题是，您在每个countFrequency个线程中使用相同的文件描述符，每个线程执行一次fseek，然后尝试循环读取。最后fseek获胜。

必须首先解决这个设计缺陷。

为什么我的程序没有输出正确的单词数？

1 个答案: