在Java中,可以使用“ MatchAllDocsQuery()”完成操作,但是没有有关Pylucene的文档提及如何完成操作。
这是python代码,用于发布单个查询,然后从检索到的文档中提取所有字段。
INDEX_DIR = "directory/where/the/document/index/is/stored"
import sys, os, lucene
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
def run(searcher, analyzer):
while True:
print
print("Hit enter with no input to quit.")
command = input("Query:")
if command == '':
return
print
print("Searching for:", command)
query = QueryParser("contents", analyzer).parse(command)
#query = "MatchAllDocsQuery()"
scoreDocs = searcher.search(query, 50).scoreDocs
print("%s total matching documents." % len(scoreDocs))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue()) for field in doc.getFields())
print(table['doi'])
#print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))
if __name__ == '__main__':
lucene.initVM()
print('lucene', lucene.VERSION)
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
print("Directory name is given below")
print(directory)
searcher = IndexSearcher(DirectoryReader.open(directory))
print(searcher)
analyzer = StandardAnalyzer()
# Calling the run function for execution
run(searcher, analyzer)
del searcher
答案 0 :(得分:0)
查询中的微小更改可使Lucene检索所有索引文档。这只是将命令变量替换为(command =“.✱。”)。 .✱。搜索所有文档中的所有字段和字段值(使用星号)。
INDEX_DIR = "directory/where/the/document/index/is/stored"
import sys, os, lucene
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
def run(searcher, analyzer):
command = ".*."
print("Searching for:", command)
query = QueryParser("contents", analyzer).parse(command)
#query = "MatchAllDocsQuery()"
scoreDocs = searcher.search(query, 50).scoreDocs
print("%s total matching documents." % len(scoreDocs))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue()) for field in doc.getFields())
print(table['doi'])
#print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))
if __name__ == '__main__':
lucene.initVM()
print('lucene', lucene.VERSION)
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
print("Directory name is given below")
print(directory)
searcher = IndexSearcher(DirectoryReader.open(directory))
print(searcher)
analyzer = StandardAnalyzer()
# Calling the run function for execution
run(searcher, analyzer)
del searcher