使用二进制搜索比较2个文件的一些快速帮助

时间:2016-09-29 22:19:35

标签: c string binary-search

所以我为一个项目编写了这个代码,我认为它可以正常工作,但它只是对我的一个文件(IkeaWords.txt文件)中的第一个术语进行了比较。 我哪里做错了?  所以这就是我写的希望它已经足够了。

{{1}}

如果你需要知道的话,我在2015年的visual studio中写了它。

感谢您的帮助!

1 个答案:

答案 0 :(得分:0)

您的代码中有多个错误和不必要的内容。我冒昧地改变一些东西以使其工作(如果你遵循评论中的提示,你可能已经找到它们)和一些使它更清洁的东西(来自GCC的非编译器警告)。由于缺乏MSVS,没有检查MSVS。

#define _CRT_SECURE_NO_WARNINGS
// changed values to accomodate different data-files sizes
#define NumberOfWordsInDictionary 99172
#define MaxWordSize 64
#define NumberOfWordsInIkea 1393
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// from /usr/dict/words (put to lower case)
const char DictionaryFileName[] = "words.txt";
// scraped from http://lar5.com/ikea/ (put to lower case)
const char IkeaFileName[] = "ikea_names.txt";

// ripped 'const' and changed ikeaWord[][] to take a the single entry
int binarySearch(char *ikeaWord, char dictionary[][MaxWordSize],
                 int numberOfDictionaryWords)
{
  int low, mid, high;
  int searchResult = -1;
  low = 0;
  high = numberOfDictionaryWords - 1;

  // ripped outer loop because we search for Ikea names one by one
  while (low <= high) {
    mid = (low + high) / 2;
    searchResult = strcmp(ikeaWord, dictionary[mid]);
    if (searchResult == 0) {
      return mid;
    } else if (searchResult < 0) {
      high = mid - 1;
    } else {
      low = mid + 1;
    }
  }
  return -1;
}

int readWordsInFromDictionaryFile(FILE * pInputFile,
                                  char dictionary[][MaxWordSize])
{
  int index = 0;
  int maxWordLength = 0;
  // ripped fopen() because that happened already in main()

  // Changed from fscanf to fgets because the *scanf() family is a 
  // never ending source of problems, see stackoverflow et al. for endless examples
  while (fgets(dictionary[index], MaxWordSize - 1, pInputFile)) {
    int tempLength = (int) strlen(dictionary[index]);
    // Because of the change from fscanf to fgets we need to snip the newline off
    // (for "\r\n" endings snipp two)
    dictionary[index][tempLength - 1] = '\0';
    if (tempLength > maxWordLength) {
      maxWordLength = tempLength;
    }
    index++;
  }
  // If fgets returns NULL it is either EOF or an error
  if (ferror(pInputFile)) {
    fprintf(stderr, "something bad happend while reading dictionary\n");
    return 0;
  }
  fclose(pInputFile);
  printf("There were %d words read from the dictionary with max length %d.\n",
         index, maxWordLength);
  return 1;
}

// snipped off the addition of "2" to the variable names, no need for that
int readWordsInFromIkeaFile(FILE * pInputFile, char ikeaWord[][MaxWordSize])
{
  int index = 0;
  int maxIkeaWordLength = 0;

  while (fgets(ikeaWord[index], MaxWordSize - 1, pInputFile)) {

    int tempLength = (int) strlen(ikeaWord[index]);
    ikeaWord[index][tempLength - 1] = '\0';
    if (tempLength > maxIkeaWordLength) {
      maxIkeaWordLength = tempLength;
    }
    index++;
  }
  if (ferror(pInputFile)) {
    fprintf(stderr, "something bad happend while reading ikeawords\n");
    return 0;
  }
  printf("There were %d words read from the Ikea file with max length %d.\n",
         index, maxIkeaWordLength);
  return 1;
}

 //--------------------------------------------------------------------------------------
int main()
{
  char dictionary[NumberOfWordsInDictionary][MaxWordSize];
  char ikeaWord[NumberOfWordsInIkea][MaxWordSize];

  int res;
  // added error-checks
  FILE *pInputFile = fopen(DictionaryFileName, "r");
  if (pInputFile == NULL) {
    fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
            DictionaryFileName);
    exit(EXIT_FAILURE);
  }
  FILE *pInputFile2 = fopen(IkeaFileName, "r");
  if (pInputFile2 == NULL) {
    fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
            IkeaFileName);
    exit(EXIT_FAILURE);
  }
  if ((res = readWordsInFromDictionaryFile(pInputFile, dictionary)) == 0) {
    fprintf(stderr, "Error in reading dictionary\n");
    exit(EXIT_FAILURE);
  }
  if ((res = readWordsInFromIkeaFile(pInputFile2, ikeaWord)) == 0) {
    fprintf(stderr, "Error in reading ikea-file\n");
    exit(EXIT_FAILURE);
  }

  int index = -1;
  int j = 0;
  while (j < NumberOfWordsInIkea) {
    index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);

    if (index != -1) {
      printf("The word \"%s\" was found.\n", dictionary[index]);
    }
    j++;
  }
// Seems to be useful when run in MS-Windows
#if defined _WIN32 ||  defined WIN32 || defined WIN64 || defined _WIN64
   sytem("pause");
#endif
  exit(EXIT_SUCCESS);
}

我没有磨光每一个角落,它仍然需要一些工作。例如:读取这两个文件的两个函数实际上是相同的,只是针对不同的文件和不同的字典。这可以通过单一功能完成。文件的名称,文件的长度以及这些文件的条目长度是固定的,它们可以是动态的,以便能够使用不同的输入而无需重新编译。

但是一切都好:开始不错!