以下是我的配置和代码。基本上,我正在尝试使用预输入搜索从ES索引中获取记录。尽管单字搜索按预期工作,但是仅在单个字段上,多字搜索根本无法工作。
我的要求是在多个字段中搜索后,根据搜索到的单词获取记录。例如,如果我搜索名称“ Jason K Smith”,则查询应在所有字段(名称,地址,姓氏,姓氏等)上运行,因为搜索到的文本可能在多个字段中。另外,如果我搜索两个名称,例如“ Mike John”,则结果应包含两个名称的记录(我认为这是可能的,可能是错误的)。
下面是我的代码:
hibernate.cfg.xml
<property name="hibernate.search.default.indexmanager">elasticsearch</property>
<property name="hibernate.search.default.elasticsearch.host">http://127.0.0.1:9200</property>
<property name="hibernate.search.default.elasticsearch.index_schema_management_strategy">drop-and-create</property>
<property name="hibernate.search.default.elasticsearch.required_index_status">yellow</property>
实体类
@Entity
@Indexed
public class MYClass {
private DBAccessStatus dBAccessStatus;
private String optname = "";
private String phone1 = "";
@Fields({
@Field(name = "clientname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "standardAnalyzer")),
@Field(name = "edgeNGramClientname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteEdgeAnalyzer")),
@Field(name = "nGramClientname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteNGramAnalyzer"))
})
private String clientname = "";
@Fields({
@Field(name = "firstname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "standardAnalyzer")),
@Field(name = "edgeNGramFirstName", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteEdgeAnalyzer")),
@Field(name = "nGramFirstName", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteNGramAnalyzer"))
})
private String firstname = "";
@Fields({
@Field(name = "midname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "standardAnalyzer")),
@Field(name = "edgeNGramMidname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteEdgeAnalyzer")),
@Field(name = "nGramMidname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteNGramAnalyzer"))
})
private String midname = "";
private String prefixnm = "";
private String suffixnm = "";
@Fields({
@Field(name = "longname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "standardAnalyzer")),
@Field(name = "edgeNGramLongname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteEdgeAnalyzer")),
@Field(name = "nGramLongname", index = Index.YES, store = Store.YES,
analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteNGramAnalyzer"))
})
private String longname = "";
分析仪定义
@AnalyzerDefs({
@AnalyzerDef(name = "autocompleteEdgeAnalyzer",
// Split input into tokens according to tokenizer
tokenizer = @TokenizerDef(factory = KeywordTokenizerFactory.class),
filters = {
// Normalize token text to lowercase, as the user is unlikely to
// care about casing when searching for matches
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "([^a-zA-Z0-9\\.])"),
@Parameter(name = "replacement", value = " "),
@Parameter(name = "replace", value = "all") }),
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = StopFilterFactory.class),
// Index partial words starting at the front, so we can provide
// Autocomplete functionality
@TokenFilterDef(factory = EdgeNGramFilterFactory.class, params = {
@Parameter(name = "minGramSize", value = "3"),
@Parameter(name = "maxGramSize", value = "50") }) }),
@AnalyzerDef(name = "autocompleteNGramAnalyzer",
// Split input into tokens according to tokenizer
tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
filters = {
// Normalize token text to lowercase, as the user is unlikely to
// care about casing when searching for matches
@TokenFilterDef(factory = WordDelimiterFilterFactory.class),
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = NGramFilterFactory.class, params = {
@Parameter(name = "minGramSize", value = "3"),
@Parameter(name = "maxGramSize", value = "5") }),
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "([^a-zA-Z0-9\\.])"),
@Parameter(name = "replacement", value = " "),
@Parameter(name = "replace", value = "all") }) }),
@AnalyzerDef(name = "standardAnalyzer",
// Split input into tokens according to tokenizer
tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class),
filters = {
// Normalize token text to lowercase, as the user is unlikely to
// care about casing when searching for matches
@TokenFilterDef(factory = WordDelimiterFilterFactory.class),
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "([^a-zA-Z0-9\\.])"),
@Parameter(name = "replacement", value = " "),
@Parameter(name = "replace", value = "all") }) }),
@AnalyzerDef(name = "textanalyzer", tokenizer = @TokenizerDef(factory = StandardTokenizerFactory.class), filters = {
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = SnowballPorterFilterFactory.class, params = {
@Parameter(name = "language", value = "English") }) }) // Def
})
搜索结果示例
{
"_index" : "com.csc.pt.svc.data.to.bascltj001to",
"_type" : "com.csc.pt.svc.data.to.Bascltj001TO",
"_id" : "44,13",
"_score" : 1.0,
"_source" : {
"id" : "44,13",
"cltseqnum" : 44,
"addrseqnum" : "13",
"clientname" : "Thompsan 1",
"edgeNGramClientname" : "Thompsan 1",
"nGramClientname" : "Thompsan 1",
"firstname" : "Robert",
"edgeNGramFirstName" : "Robert",
"nGramFirstName" : "Robert",
"longname" : "Robert Thompsan",
"edgeNGramLongname" : "Robert Thompsan",
"nGramLongname" : "Robert Thompsan",
"addrln1" : "1 Main Street",
"edgeNGramAddrln1" : "1 Main Street",
"nGramAddrln1" : "1 Main Street",
"city" : "Columbia",
"edgeNGramCity" : "Columbia",
"nGramCity" : "Columbia",
"state" : "SC",
"edgeNGramState" : "SC",
"nGramState" : "SC",
"zipcode" : "29224",
"edgeNGramZipcode" : "29224",
"nGramZipcode" : "29224",
"country" : "USA",
"edgeNGramCountry" : "USA",
"nGramCountry" : "USA"
}
},
当前应用的代码:
protected static final String FIRSTNAME_EDGE_NGRAM_INDEX = "edgeNGramFirstName";
protected static final String FIRSTNAME_NGRAM_INDEX = "nGramFirstName";
protected static final String MIDNAME_EDGE_NGRAM_INDEX = "edgeNGramMidname";
protected static final String MIDNAME_NGRAM_INDEX = "nGramMidname";
protected static final String PHONE1_EDGE_NGRAM_INDEX = "edgeNGramPhone1";
protected static final String PHONE1_NGRAM_INDEX = "nGramPhone1";
protected static final String LONGNAME_EDGE_NGRAM_INDEX = "edgeNGramLongname";
protected static final String LONGNAME_NGRAM_INDEX = "nGramLongname";
protected static final String CLIENT_EDGE_NGRAM_INDEX = "edgeNGramClientname";
protected static final String CLIENT_NGRAM_INDEX = "nGramClientname";
protected static final String ADDRLN1_EDGE_NGRAM_INDEX = "edgeNGramAddrln1";
protected static final String ADDRLN1_NGRAM_INDEX = "nGramAddrln1";
protected static final String ADDRLN2_EDGE_NGRAM_INDEX = "edgeNGramAddrln2";
protected static final String ADDRLN2_NGRAM_INDEX = "nGramAddrln2";
protected static final String ADDRLN3_EDGE_NGRAM_INDEX = "edgeNGramAddrln3";
protected static final String ADDRLN3_NGRAM_INDEX = "nGramAddrln3";
protected static final String ADDRLN4_EDGE_NGRAM_INDEX = "edgeNGramAddrln4";
protected static final String ADDRLN4_NGRAM_INDEX = "nGramAddrln4";
protected static final String CITY_EDGE_NGRAM_INDEX = "edgeNGramCity";
protected static final String CITY_NGRAM_INDEX = "nGramCity";
protected static final String STATE_EDGE_NGRAM_INDEX = "edgeNGramState";
protected static final String STATE_NGRAM_INDEX = "nGramState";
protected static final String COUNTRY_EDGE_NGRAM_INDEX = "edgeNGramCountry";
protected static final String COUNTRY_NGRAM_INDEX = "nGramCountry";
protected void getClt0100Data(){
Query query = queryBuilder.phrase().withSlop(5).
onField(FIRSTNAME_EDGE_NGRAM_INDEX).andField(FIRSTNAME_NGRAM_INDEX)
.andField(MIDNAME_EDGE_NGRAM_INDEX).andField(MIDNAME_NGRAM_INDEX)
.andField(LONGNAME_EDGE_NGRAM_INDEX).andField(LONGNAME_NGRAM_INDEX)
.andField(CLIENT_EDGE_NGRAM_INDEX).andField(CLIENT_NGRAM_INDEX)
.andField(ADDRLN1_EDGE_NGRAM_INDEX).andField(ADDRLN1_NGRAM_INDEX)
.andField(ADDRLN2_EDGE_NGRAM_INDEX).andField(ADDRLN2_NGRAM_INDEX)
.andField(ADDRLN3_EDGE_NGRAM_INDEX).andField(ADDRLN3_NGRAM_INDEX)
.andField(ADDRLN4_EDGE_NGRAM_INDEX).andField(ADDRLN4_NGRAM_INDEX)
.andField(CITY_EDGE_NGRAM_INDEX).andField(CITY_NGRAM_INDEX)
.andField(STATE_EDGE_NGRAM_INDEX).andField(STATE_NGRAM_INDEX)
.andField(COUNTRY_EDGE_NGRAM_INDEX).andField(COUNTRY_NGRAM_INDEX)
.boostedTo(5).sentence(this.data.getSearchText().toLowerCase()).createQuery();
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(query, Bascltj001TO.class);
fullTextQuery.setMaxResults(this.data.getPageSize()).setFirstResult(this.data.getPageSize())
.setProjection("longname, firstname", "cltseqnum", "midname", "clientname", "addrln1","addrln2","addrln3","addrln4","city","state","zipcode", "country")
.setResultTransformer( new BasicTransformerAdapter() {
@Override
public Cltj001ElasticSearchResponseTO transformTuple(Object[] tuple, String[] aliases) {
return new Cltj001ElasticSearchResponseTO((String) tuple[0], (String) tuple[1], (long) tuple[2], (String) tuple[3], (String) tuple[4],
(String) tuple[5],(String) tuple[6],(String) tuple[7],(String) tuple[8],(String) tuple[9], (String) tuple[10], (String) tuple[11], (String) tuple[12]);
}
});
resultsClt0100List = fullTextQuery.getResultList();
}
答案 0 :(得分:0)
你在做什么很奇怪。
我不明白为什么要使用词组搜索来使用ngram。我认为效果不佳。
我认为简单的查询字符串更适合您的需求:https://docs.jboss.org/hibernate/search/5.8/reference/en-US/html_single/#_simple_query_string_queries。
但是再说一次,您到处都在使用ngram,而您所描述的所需功能实际上并不需要ngram,因为看起来您期望精确的搜索。
我建议您从简单开始,并使用分析仪删除重音并降低文本的大小写,使其生效。
然后,如果您真的想要某种模糊搜索,请考虑使用ngram。