使用MPI的拼写检查程序

时间:2014-04-21 03:49:29

标签: mpi spell-checking

所以,我的任务是编写一个拼写检查程序,然后使用openMPI对其进行并行化。我的看法是将文本文件中的单词加载到名为dict []的数组中,这用作我的字典。接下来,我从用户那里获得输入,然后它应该通过字典数组并检查当前字是否在阈值百分比内,如果是,则将其打印出来。但我只打算打印出一定数量的单词。我的问题是,是的,我的建议[]数组,似乎没有填满我需要它的方式,并且它在其中得到了很多空白点,而我,至少我想,是我编写它的方法是在单词达到阈值时填充它。因此,在不再添加任何单词之前,它不应该有任何空白。我认为它已接近完成但我似乎无法弄明白这一部分。任何帮助表示赞赏。

#include <stdio.h>
#include <mpi.h>
#include <string.h>
#include <stdlib.h>
#define SIZE 30
#define max(x,y) (((x) > (y)) ? (x) : (y))
char *dict[50000];
char *suggestions[50000];
char enterWord[50];
char *myWord;
int wordsToPrint = 20;
int threshold = 40;
int i;
int words_added = 0;


   int levenshtein(const char *word1, int len1, const char *word2, int len2){
      int matrix[len1 + 1][len2 + 1];
      int a;
      for(a=0; a<= len1; a++){
         matrix[a][0] = a;
      }
      for(a=0;a<=len2;a++){
         matrix[0][a] = a;
      }

      for(a = 1; a <= len1; a++){
         int j;
         char c1;

         c1 = word1[a-1];
         for(j = 1; j <= len2; j++){
            char c2;

            c2 = word2[j-1];
            if(c1 == c2){
               matrix[a][j] = matrix[a-1][j-1];
            }
            else{
               int delete, insert, substitute, minimum;

               delete = matrix[a-1][j] +1;
               insert = matrix[a][j-1] +1;
               substitute = matrix[a-1][j-1] +1;
               minimum = delete;

               if(insert < minimum){
                  minimum = insert;
               }
               if(substitute < minimum){
                  minimum = substitute;
               }
               matrix[a][j] = minimum;
            }//else
         }//for
      }//for
      return matrix[len1][len2];
   }//levenshtein

   void prompt(){
      printf("Enter word to search for: \n");
      scanf("%s", &enterWord);
   }


   int p0_compute_output(int num_processes, char *word1){
      int totalNumber = 0;
      int k = 0;
      int chunk = 50000 / num_processes;
      for(i = 0; i < chunk; i++){
         int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
         int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
         if(thresholdPercentage < threshold){
            suggestions[totalNumber] = dict[i];
            totalNumber = totalNumber + 1;
         }
      }//for
      return totalNumber;
   }//p0_compute_output

   void p0_receive_output(int next_addition){
      int num_to_add;
      MPI_Comm comm;
      MPI_Status status;
         MPI_Recv(&num_to_add,1,MPI_INT,MPI_ANY_SOURCE, MPI_ANY_TAG,MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         printf("--%d\n", num_to_add);
         suggestions[next_addition] = dict[num_to_add];
         next_addition = next_addition + 1;
   }

   void compute_output(int num_processes, int me, char *word1){
      int chunk = 0;
      int last_chunk = 0;
      MPI_Comm comm;
      if(50000 % num_processes == 0){
         chunk = 50000 / num_processes;
         last_chunk = chunk;
         int start = me * chunk;
         int end = me * chunk + chunk;
         for(i = start; i < end;i++){
            int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
            int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
            if(thresholdPercentage < threshold){
               int number_to_send = i;
               MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
            }
         }
      }
      else{
         chunk = 50000 / num_processes;
         last_chunk = 50000 - ((num_processes - 1) * chunk);
         if(me != num_processes){
            int start = me * chunk;
            int end = me * chunk + chunk;
            for(i = start; i < end; i++){
               int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
               int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
               if(thresholdPercentage < threshold){
                  int number_to_send = i;
                  MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
               }//if
            }//for
         }//if me != num_processes
         else{
            int start = me * chunk;
            int end = 50000 - start;
            for(i = start; i < end; i++){
               int minedits = levenshtein(word1, strlen(word1), dict[i], strlen(dict[i]));
               int thresholdPercentage = (100 * minedits) / max(strlen(word1), strlen(dict[i]));
               if(thresholdPercentage < threshold){
                  int number_to_send = i;
                  MPI_Send(&number_to_send, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
               }
            }
         }//me == num_processes
      }//BIG else
      return;
   }//COMPUTE OUTPUT

   void set_data(){
      prompt();
      MPI_Bcast(&enterWord,20 ,MPI_CHAR, 0, MPI_COMM_WORLD);
   }//p0_send_inpui


//--------------------------MAIN-----------------------------//
main(int argc, char **argv){
   int ierr, num_procs, my_id, loop;
   FILE *myFile;
   loop = 0;

   for(i=0;i<50000;i++){
      suggestions[i] = calloc(SIZE, sizeof(char));
   }

   ierr = MPI_Init(NULL, NULL);
   ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
   ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
   printf("Check in from %d of %d processors\n", my_id, num_procs);

   set_data();
   myWord = enterWord;

   myFile = fopen("words", "r");
   if(myFile != NULL){
      for(i=0;i<50000;i++){
         dict[i] = calloc(SIZE, sizeof(char));
         fscanf(myFile, "%s", dict[i]);
      }//for
      fclose(myFile);
   }//read word list into dictionary
   else printf("File not found");

   if(my_id == 0){
      words_added = p0_compute_output(num_procs, enterWord);
      printf("words added so far: %d\n", words_added);
      p0_receive_output(words_added);
      printf("Threshold: %d\nWords To print: %d\n%s\n", threshold, wordsToPrint, myWord);
      ierr = MPI_Finalize();
   }
   else{
      printf("my word %s*\n", enterWord);
      compute_output(num_procs, my_id, enterWord);
     // printf("Process %d terminating...\n", my_id);
      ierr = MPI_Finalize();
   }

   for(i=0;i<wordsToPrint;i++){
      printf("*%s\n", suggestions[i]);
   }//print suggestions

   return (0);
}//END MAIN

1 个答案:

答案 0 :(得分:0)

以下是我所看到的一些问题:

  • prompt()只应按等级0调用。
  • 字典文件应仅由等级0读取,然后将数组广播到其他等级
    • 或者,等级1读取文件,等级0等待输入,广播输入和字典后。
  • 您的compute_output步骤过于复杂。您可以将p0_compute_output和compute_output合并到一个例程中。
    • 在每个等级中将索引数组存储到dict中
    • 此数组在每个等级中的大小不同,因此最简单的方法是从每个等级发送一个指示数组大小的整数,然后发送具有此大小的数组。 (接收等级必须知道预期的数据量)。您也可以使用MPI_Gatherv的大小,但我希望这比您现在想做的更多。
    • 一旦您在排名0中有一个索引数组,那么使用它来填充建议。
  • 保存MPI_Finalize通话直到返回通话
  • 对于最终的printf调用,只有等级0应该打印出来。我怀疑这是导致“不正确”结果的很大一部分。如你所知,所有排名都是打印建议,但它只填写排名0.所以其他人都将打印空白条目。

尝试其中一些更改,尤其是最后一个更改,看看是否有帮助。