import java.io.File;
import java.io.FileOutputStream;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;
public class myclass {
public static void main(String[] args) {
FileOutputStream file = null;
String result = "<html><body><div>(i) the recognised association shall have the approval of the Forward Markets Commission established under the Forward Contracts (Regulation) Act, 1952 (74 of 1952) in respect of trading in derivatives and shall function in accordance with the guidelines or conditions laid down by the Forward Markets Commission; </div> <body> </html>";
try {
TermQuery query = new TermQuery(new Term("f", "Forward Markets"));
QueryScorer scorer = new QueryScorer(query);
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(
"<span class=\"highlight\">", "</span>");
org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(result.length());
highlighter.setTextFragmenter(fragmenter);
TokenStream tokenStream = new StandardAnalyzer(Version.LUCENE_29)
.tokenStream("f",
new StringReader(result));
String result1 = highlighter.getBestFragments(tokenStream, result,
1, "...");
if (result1.length() == 0) {
result1 = result;
}
String finalhtml = "<html>" + "<style>\n" + ".highlight{\n"
+ " background: yellow;\n" + "}\n" + "</style>" + "<body>"
+ result1 + "</body></html>";
byte[] contentInBytes = finalhtml.getBytes();
file = new FileOutputStream(new File("E:\\myhtml.html"));
file.write(contentInBytes);
file.flush();
file.close();
} catch (Exception ex) {
}
}
}
这是我的代码,当我设置文本“识别”或“关联”其突出完美但当我设置text =“前进市场”其noe突出显示请告诉我在哪里做错误请帮助我如何突出显示文本如果空间会来的。
答案 0 :(得分:0)
您的问题是recognised
和association
是术语,但forward markets
不是术语。这是两个术语,forward
和markets
,应该按顺序找到。这通常被称为“短语”。可以使用PhraseQuery
来查找它,但通常更简单的方法是使用查询解析器,例如:
StandardQueryParser parser = new StandardQueryParser(analyzer);
Query query = parse.parse("f:\"Forward Markets\"")
Query Scorer scorer = new QueryScorer(query);
//.......
如果您决定手动构建PhraseQuery
,请仔细阅读文档。您必须单独将每个Term添加到查询中,例如:
phrasequery.add(new Term("f", "forward"));
phrasequery.add(new Term("f", "markets"));
在类似的说明中,当手动构建查询时,请注意您的分析器。您在上面查询包括大写。手动传递到TermQuery
的术语不会被分析。如果您使用的分析仪的LowercaseFilter
(例如StandardAnalyzer
),则会导致您找不到任何结果。