这是我的代码:
def get_data():
data = []
with open('/project/XX/data/lucene.txt') as f:
for line in f:
line = line.strip()
question, answer = line.split('\t')
data.append((question, answer))
return data
directory = SimpleFSDirectory(Paths.get("/project/lucene_index/"))
analyzer = WhitespaceAnalyzer()
def index():
preprocess_start_time = time.time()
config = IndexWriterConfig(analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(directory, config)
data = get_data()
q = FieldType()
q.setStored(True)
q.setTokenized(True) # True if this field’s value should be analyzed by the Analyzer.
a = FieldType()
a.setStored(True)
a.setTokenized(False) # True if this field’s value should be analyzed by the Analyzer.
for question, answer in data:
print(question, answer)
doc = Document()
doc.add(Field('question', question, q))
doc.add(Field('answer', answer, a))
writer.addDocument(doc)
print('Indexed %d persons.' % len(data))
writer.close()
preprocess_end_time = time.time()
output_file = open("lucene_time_calc.txt", "a")
output_file.write(
"Lucene Preprocessing time (Indexing) = " + str(preprocess_end_time - preprocess_start_time) + " secs\n")
output_file.close()
print("Indexing completed \n")
def search(q):
print('Searching text "%s".' % q)
reader = DirectoryReader.open(directory)
searcher = IndexSearcher(reader)
query = QueryParser('question', analyzer).parse(q)
print(query)
results = searcher.search(query, 1000)
score_docs = results.scoreDocs
print('Found %d hits:' % results.totalHits)
for score_doc in score_docs:
doc = searcher.doc(score_doc.doc)
score = score_doc.score
question = doc['question']
answer = doc['answer']
print('[%f]: "%s": "%s"' % (score, question, answer))
reader.close();
directory.close();
search(q)返回0。 我的lucene版本是6.4.1。我的文档是空白的。 我将standardAnalyzer更改为WhitespaceAnalyzer,得到的结果相同。 当我更改版本时,它给我一个错误: “版本”没有属性“ LUCENE_40”。我以这个例子为例。 这里发生了什么?