我正在尝试通过对包含这些术语的段落进行排名来构建查询解析器
我知道我需要使用SpanNearQuery
,但即使在完成文档后我也找不到访问Spans的方法。我得到的方法返回null。
我已阅读https://lucidworks.com/blog/2009/07/18/the-spanquery/,其中以良好的方式解释了该查询。这解释了如何访问跨度,但它适用于solr 4.0,不幸的是solr 6.3不再具有原子阅读器。
我怎样才能获得实际的跨度?
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
log.warn("in Process");
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
Query origQuery = rb.getQuery();
// TODO: longer term, we don't have to be a span query, we could re-analyze the document
if (origQuery != null) {
if (origQuery instanceof SpanNearQuery == false) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Illegal query type. The incoming query must be a Lucene SpanNearQuery and it was a " + origQuery.getClass().getName());
}
SpanNearQuery sQuery = (SpanNearQuery) origQuery;
SolrIndexSearcher searcher = rb.req.getSearcher();
IndexReader reader = searcher.getIndexReader();
log.warn("before leaf reader context");
List<LeafReaderContext> ctxs = (List<LeafReaderContext>) reader.leaves();
log.warn("after leaf reader context");
LeafReaderContext ctx = ctxs.get(0);
SpanWeight spanWeight = sQuery.createWeight(searcher, true);
Spans spans = spanWeight.getSpans(ctx, SpanWeight.Postings.POSITIONS);
AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);
Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>();
Spans spans = fleeceQ.getSpans(wrapper.getContext(), new Bits.MatchAllBits(reader.numDocs()), termContexts);
// SpanWeight.Postings[] postings= SpanWeight.Postings.values();
// Spans spans = sQuery.getSpans();
// Assumes the query is a SpanQuery
// Build up the query term weight map and the bi-gram
Map<String, Float> termWeights = new HashMap<String, Float>();
Map<String, Float> bigramWeights = new HashMap<String, Float>();
createWeights(params.get(CommonParams.Q), sQuery, termWeights, bigramWeights, reader);
float adjWeight = params.getFloat(ADJACENT_WEIGHT, DEFAULT_ADJACENT_WEIGHT);
float secondAdjWeight = params.getFloat(SECOND_ADJ_WEIGHT, DEFAULT_SECOND_ADJACENT_WEIGHT);
float bigramWeight = params.getFloat(BIGRAM_WEIGHT, DEFAULT_BIGRAM_WEIGHT);
// get the passages
int primaryWindowSize = params.getInt(OWLParams.PRIMARY_WINDOW_SIZE, DEFAULT_PRIMARY_WINDOW_SIZE);
int adjacentWindowSize = params.getInt(OWLParams.ADJACENT_WINDOW_SIZE, DEFAULT_ADJACENT_WINDOW_SIZE);
int secondaryWindowSize = params.getInt(OWLParams.SECONDARY_WINDOW_SIZE, DEFAULT_SECONDARY_WINDOW_SIZE);
WindowBuildingTVM tvm = new WindowBuildingTVM(primaryWindowSize, adjacentWindowSize, secondaryWindowSize);
PassagePriorityQueue rankedPassages = new PassagePriorityQueue();
// intersect w/ doclist
DocList docList = rb.getResults().docList;
log.warn("Before Spans");
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
// build up the window
log.warn("Iterating through spans");
if (docList.exists(spans.docID())) {
tvm.spanStart = spans.startPosition();
tvm.spanEnd = spans.endPosition();
// tvm.terms
Terms terms = reader.getTermVector(spans.docID(), sQuery.getField());
tvm.map(terms, spans);
// The entries map contains the window, do some ranking of it
if (tvm.passage.terms.isEmpty() == false) {
log.debug("Candidate: Doc: {} Start: {} End: {} ", new Object[] { spans.docID(), spans.startPosition(), spans.endPosition() });
}
tvm.passage.lDocId = spans.docID();
tvm.passage.field = sQuery.getField();
// score this window
try {
addPassage(tvm.passage, rankedPassages, termWeights, bigramWeights, adjWeight, secondAdjWeight, bigramWeight);
} catch (CloneNotSupportedException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Internal error cloning Passage", e);
}
// clear out the entries for the next round
tvm.passage.clear();
}
}
}
}