我正在将Solr ComplexPhraseQueryParser 4.7与inOrder补丁一起使用:https://issues.apache.org/jira/browse/SOLR-1604
我正在使用Solr 4.3.1,并且不允许升级。
这是我的ComplexPhraseQueryParser类包含补丁的样子:
package org.apache.lucene.queryparser.classic;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class ComplexPhraseQueryParser extends QueryParser {
private ArrayList<ComplexPhraseQuery> complexPhrases = null;
private boolean isPass2ResolvingPhrases;
private boolean inOrder = true;
/**
* When <code>inOrder</code> is true, the search terms must
* exists in the documents as the same order as in query.
*
* @param inOrder parameter to choose between ordered or un-ordered proximity search
*/
public void setInOrder(final boolean inOrder) {
this.inOrder = inOrder;
}
private ComplexPhraseQuery currentPhraseQuery = null;
public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) {
super(matchVersion, f, a);
}
protected Query getFieldQuery(String field, String queryText, int slop) {
ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop, inOrder);
complexPhrases.add(cpq); // add to list of phrases to be parsed once
// we
// are through with this pass
return cpq;
}
public Query parse(String query) throws ParseException {
if (isPass2ResolvingPhrases) {
MultiTermQuery.RewriteMethod oldMethod = getMultiTermRewriteMethod();
try {
// Temporarily force BooleanQuery rewrite so that Parser will
// generate visible
// collection of terms which we can convert into SpanQueries.
// ConstantScoreRewrite mode produces an
// opaque ConstantScoreQuery object which cannot be interrogated for
// terms in the same way a BooleanQuery can.
// QueryParser is not guaranteed threadsafe anyway so this temporary
// state change should not
// present an issue
setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return super.parse(query);
} finally {
setMultiTermRewriteMethod(oldMethod);
}
}
// First pass - parse the top-level query recording any PhraseQueries
// which will need to be resolved
complexPhrases = new ArrayList<ComplexPhraseQuery>();
Query q = super.parse(query);
// Perform second pass, using this QueryParser to parse any nested
// PhraseQueries with different
// set of syntax restrictions (i.e. all fields must be same)
isPass2ResolvingPhrases = true;
try {
for (Iterator iterator = complexPhrases.iterator(); iterator.hasNext();) {
currentPhraseQuery = (ComplexPhraseQuery) iterator.next();
// in each phrase, now parse the contents between quotes as a
// separate parse operation
currentPhraseQuery.parsePhraseElements(this);
}
} finally {
isPass2ResolvingPhrases = false;
}
return q;
}
// There is No "getTermQuery throws ParseException" method to override so
// unfortunately need
// to throw a runtime exception here if a term for another field is embedded
// in phrase query
protected Query newTermQuery(Term term) {
if (isPass2ResolvingPhrases) {
try {
checkPhraseClauseIsForSameField(term.field());
} catch (ParseException pe) {
throw new RuntimeException("Error parsing complex phrase", pe);
}
}
return super.newTermQuery(term);
}
// Helper method used to report on any clauses that appear in query syntax
private void checkPhraseClauseIsForSameField(String field)
throws ParseException {
if (!field.equals(currentPhraseQuery.field)) {
throw new ParseException("Cannot have clause for field \"" + field
+ "\" nested in phrase " + " for field \"" + currentPhraseQuery.field
+ "\"");
}
}
@Override
protected Query getWildcardQuery(String field, String termStr)
throws ParseException {
if (isPass2ResolvingPhrases) {
checkPhraseClauseIsForSameField(field);
}
return super.getWildcardQuery(field, termStr);
}
@Override
protected Query getRangeQuery(String field, String part1, String part2,
boolean startInclusive, boolean endInclusive) throws ParseException {
if (isPass2ResolvingPhrases) {
checkPhraseClauseIsForSameField(field);
}
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
}
@Override
protected Query newRangeQuery(String field, String part1, String part2,
boolean startInclusive, boolean endInclusive) {
if (isPass2ResolvingPhrases) {
// Must use old-style RangeQuery in order to produce a BooleanQuery
// that can be turned into SpanOr clause
TermRangeQuery rangeQuery = TermRangeQuery.newStringRange(field, part1, part2, startInclusive, endInclusive);
rangeQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
return rangeQuery;
}
return super.newRangeQuery(field, part1, part2, startInclusive, endInclusive);
}
@Override
protected Query getFuzzyQuery(String field, String termStr,
float minSimilarity) throws ParseException {
if (isPass2ResolvingPhrases) {
checkPhraseClauseIsForSameField(field);
}
return super.getFuzzyQuery(field, termStr, minSimilarity);
}
/*
* Used to handle the query content in between quotes and produced Span-based
* interpretations of the clauses.
*/
static class ComplexPhraseQuery extends Query {
String field;
String phrasedQueryStringContents;
int slopFactor;
boolean inOrder;
private Query contents;
public ComplexPhraseQuery(String field, String phrasedQueryStringContents,
int slopFactor, boolean inOrder) {
super();
this.field = field;
this.phrasedQueryStringContents = phrasedQueryStringContents;
this.slopFactor = slopFactor;
this.inOrder = inOrder;
}
// Called by ComplexPhraseQueryParser for each phrase after the main
// parse
// thread is through
protected void parsePhraseElements(QueryParser qp) throws ParseException {
// TODO ensure that field-sensitivity is preserved ie the query
// string below is parsed as
// field+":("+phrasedQueryStringContents+")"
// but this will need code in rewrite to unwrap the first layer of
// boolean query
String oldDefaultParserField = qp.field;
try {
//temporarily set the QueryParser to be parsing the default field for this phrase e.g author:"fred* smith"
qp.field = field;
contents = qp.parse(phrasedQueryStringContents);
}
finally {
qp.field = oldDefaultParserField;
}
}
public Query rewrite(IndexReader reader) throws IOException {
// ArrayList spanClauses = new ArrayList();
if (contents instanceof TermQuery) {
return contents;
}
// Build a sequence of Span clauses arranged in a SpanNear - child
// clauses can be complex
// Booleans e.g. nots and ors etc
int numNegatives = 0;
if (!(contents instanceof BooleanQuery)) {
throw new IllegalArgumentException("Unknown query type \""
+ contents.getClass().getName()
+ "\" found in phrase query string \"" + phrasedQueryStringContents
+ "\"");
}
BooleanQuery bq = (BooleanQuery) contents;
BooleanClause[] bclauses = bq.getClauses();
SpanQuery[] allSpanClauses = new SpanQuery[bclauses.length];
// For all clauses e.g. one* two~
for (int i = 0; i < bclauses.length; i++) {
// HashSet bclauseterms=new HashSet();
Query qc = bclauses[i].getQuery();
// Rewrite this clause e.g one* becomes (one OR onerous)
qc = qc.rewrite(reader);
if (bclauses[i].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
numNegatives++;
}
if (qc instanceof BooleanQuery) {
ArrayList<SpanQuery> sc = new ArrayList<SpanQuery>();
addComplexPhraseClause(sc, (BooleanQuery) qc);
if (sc.size() > 0) {
allSpanClauses[i] = sc.get(0);
} else {
// Insert fake term e.g. phrase query was for "Fred Smithe*" and
// there were no "Smithe*" terms - need to
// prevent match on just "Fred".
allSpanClauses[i] = new SpanTermQuery(new Term(field,
"Dummy clause because no terms found - must match nothing"));
}
} else {
if (qc instanceof TermQuery) {
TermQuery tq = (TermQuery) qc;
allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
} else if (qc instanceof PhraseQuery) {
final PhraseQuery pq = (PhraseQuery) qc;
final Term[] subTerms = pq.getTerms();
SpanQuery[] clauses = new SpanQuery[subTerms.length];
for (int j = 0; j < subTerms.length; j++) {
clauses[j] = new SpanTermQuery(subTerms[j]);
}
allSpanClauses[i] = new SpanNearQuery(clauses, 0, inOrder);
} else {
throw new IllegalArgumentException("Unknown query type \""
+ qc.getClass().getName()
+ "\" found in phrase query string \""
+ phrasedQueryStringContents + "\"");
}
}
}
if (numNegatives == 0) {
// The simple case - no negative elements in phrase
return new SpanNearQuery(allSpanClauses, slopFactor, inOrder);
}
// Complex case - we have mixed positives and negatives in the
// sequence.
// Need to return a SpanNotQuery
ArrayList<SpanQuery> positiveClauses = new ArrayList<SpanQuery>();
for (int j = 0; j < allSpanClauses.length; j++) {
if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
positiveClauses.add(allSpanClauses[j]);
}
}
SpanQuery[] includeClauses = positiveClauses
.toArray(new SpanQuery[positiveClauses.size()]);
SpanQuery include;
if (includeClauses.length == 1) {
include = includeClauses[0]; // only one positive clause
} else {
// need to increase slop factor based on gaps introduced by
// negatives
include = new SpanNearQuery(includeClauses, slopFactor + numNegatives,
inOrder);
}
// Use sequence of positive and negative values as the exclude.
SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor,
inOrder);
return new SpanNotQuery(include, exclude);
}
private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
ArrayList<SpanQuery> ors = new ArrayList<SpanQuery>();
ArrayList<SpanQuery> nots = new ArrayList<SpanQuery>();
BooleanClause[] bclauses = qc.getClauses();
// For all clauses e.g. one* two~
for (int i = 0; i < bclauses.length; i++) {
Query childQuery = bclauses[i].getQuery();
// select the list to which we will add these options
ArrayList<SpanQuery> chosenList = ors;
if (bclauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) {
chosenList = nots;
}
if (childQuery instanceof TermQuery) {
TermQuery tq = (TermQuery) childQuery;
SpanTermQuery stq = new SpanTermQuery(tq.getTerm());
stq.setBoost(tq.getBoost());
chosenList.add(stq);
} else if (childQuery instanceof BooleanQuery) {
BooleanQuery cbq = (BooleanQuery) childQuery;
addComplexPhraseClause(chosenList, cbq);
} else {
// TODO alternatively could call extract terms here?
throw new IllegalArgumentException("Unknown query type:"
+ childQuery.getClass().getName());
}
}
if (ors.size() == 0) {
return;
}
SpanOrQuery soq = new SpanOrQuery(ors
.toArray(new SpanQuery[ors.size()]));
if (nots.size() == 0) {
spanClauses.add(soq);
} else {
SpanOrQuery snqs = new SpanOrQuery(nots
.toArray(new SpanQuery[nots.size()]));
SpanNotQuery snq = new SpanNotQuery(soq, snqs);
spanClauses.add(snq);
}
}
@Override
public String toString(String field) {
return "\"" + phrasedQueryStringContents + "\"";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((field == null) ? 0 : field.hashCode());
result = prime
* result
+ ((phrasedQueryStringContents == null) ? 0
: phrasedQueryStringContents.hashCode());
result = prime * result + slopFactor;
result = prime * result + (inOrder ? 1 : 0);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
ComplexPhraseQuery other = (ComplexPhraseQuery) obj;
if (field == null) {
if (other.field != null)
return false;
} else if (!field.equals(other.field))
return false;
if (phrasedQueryStringContents == null) {
if (other.phrasedQueryStringContents != null)
return false;
} else if (!phrasedQueryStringContents
.equals(other.phrasedQueryStringContents))
return false;
if (slopFactor != other.slopFactor)
return false;
return inOrder == other.inOrder;
}
}
}
查询时,无法查询inOrder false。因此,例如,如果我在inOrder为true的情况下查找“ John Jay Tilly”的结果,则效果很好。当我用inOrder false查询“ John Tilly Jay”时,它不起作用。