lucene 6索引搜索命中零

时间:2018-07-25 13:50:36

标签: java lucene pylucene

这是我的代码:

  def get_data():
      data = []
      with open('/project/XX/data/lucene.txt') as f:
          for line in f:
              line = line.strip()
              question, answer = line.split('\t')
              data.append((question, answer))
      return data

  directory = SimpleFSDirectory(Paths.get("/project/lucene_index/"))
  analyzer = WhitespaceAnalyzer()

  def index():
      preprocess_start_time = time.time()
      config = IndexWriterConfig(analyzer)
      config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
      writer = IndexWriter(directory, config)
      data = get_data()
      q = FieldType()
      q.setStored(True)
      q.setTokenized(True)  # True if this field’s value should be analyzed by the Analyzer.
      a = FieldType()
      a.setStored(True)
      a.setTokenized(False)  # True if this field’s value should be analyzed by the Analyzer.
      for question, answer in data:
          print(question, answer)
          doc = Document()
          doc.add(Field('question', question, q))
          doc.add(Field('answer', answer, a))
          writer.addDocument(doc)
      print('Indexed %d persons.' % len(data))
      writer.close()
      preprocess_end_time = time.time()
      output_file = open("lucene_time_calc.txt", "a")
      output_file.write(
          "Lucene Preprocessing time (Indexing) = " + str(preprocess_end_time - preprocess_start_time) + " secs\n")
      output_file.close()
      print("Indexing completed \n")

  def search(q):
      print('Searching text "%s".' % q)
      reader = DirectoryReader.open(directory)
      searcher = IndexSearcher(reader)
      query = QueryParser('question', analyzer).parse(q)
      print(query)
      results = searcher.search(query, 1000)
      score_docs = results.scoreDocs
      print('Found %d hits:' % results.totalHits)

      for score_doc in score_docs:
          doc = searcher.doc(score_doc.doc)
          score = score_doc.score
          question = doc['question']
          answer = doc['answer']
          print('[%f]:  "%s": "%s"' % (score, question, answer))
      reader.close();
      directory.close();

search(q)返回0。 我的lucene版本是6.4.1。我的文档是空白的。 我将standardAnalyzer更改为WhitespaceAnalyzer,得到的结果相同。 当我更改版本时,它给我一个错误: “版本”没有属性“ LUCENE_40”。我以这个例子为例。 这里发生了什么?

0 个答案:

没有答案