我为我的客户开发了一个必须使用同义词扩展的搜索引擎。我可以使用同义词令牌过滤器和自定义文件(synonym.txt)正确设置索引。
示例:ipod,i-pod,i pod
但是,每当我们想要同义词扩展时,我都会从Elasticsearch获取同义词并在网站上显示为标签。 每个标签都可以取消选中。在这种情况下,我们如何在查询Elasticsearch时指定使用一组不同的同义词,而不是来自synonym.txt文件?
示例:如果用户正在寻找术语ipod,那么我将展示这两个标签:i-pod,i pod。但是,如果用户选择取消选择" i-pod",我希望能够指定只有" i pod"是" ipod"的同义词在查询时。
我的索引设置为:
{
"settings": {
"analysis": {
"filter": {
"elision": {
"type": "elision",
"articles": ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"]
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"french_stemmer": {
"type": "stemmer",
"language": "light_french"
},
"synonymsFilter": {
"type" : "synonym",
"synonyms_path" : "analysis/synonym.txt"
},
"autocompleteFilter": {
"max_shingle_size": "5",
"min_shingle_size": "2",
"type": "shingle"
}
},
"analyzer": {
"default": {
"tokenizer": "letter",
"filter": ["asciifolding", "lowercase", "french_stemmer", "elision", "french_stop"]
},
"auto-complete-suggester": {
"filter": [
"lowercase",
"autocompleteFilter"
],
"char_filter": [
"html_strip"
],
"type": "custom",
"tokenizer": "standard"
},
"did-you-mean-suggester": {
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase"]
},
"synonym_analyzer" : {
"tokenizer" : "whitespace",
"filter" : ["synonymsFilter"]
},
"synonym_analyzer2": {
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase", "french_stop", "autocompleteFilter"]
}
}
}
},
"mappings": {
"companies": {
"date_detection": "false",
"properties": {
"auto_complete": {
"type": "string",
"analyzer": "auto-complete-suggester",
"term_vector" : "yes"
},
"did_you_mean": {
"type": "string",
"analyzer": "did-you-mean-suggester",
"term_vector" : "yes"
},
"synonyms": {
"type": "string",
"analyzer": "synonym_analyzer",
"term_vector" : "yes"
},
"company_name": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"siren": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"CPposteEntreprise": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"commercial_company_name": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"year_creation_company": {
"type": "long"
},
"month_creation_company": {
"type": "long"
},
"month_year_creation_company": {
"type": "date",
"format": "yyyyMM",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"city_company": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"departement_company": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"region_company": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"is_excellence": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"interlocuteurs": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"flag_entreprise_finance": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_indirect": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_direct": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_investissement": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"montant_total_investissement": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"motant_total_finance": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"nombre_investissement": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"nombre_financement_accorde": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caFiltre": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"effectif": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"textRank": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
},
"term_vector" : "yes",
"copy_to": [
"synonyms"
]
},
"masterKeywords": {
"type": "nested",
"properties": {
"keyword": {
"type":"string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
}
}
},
"dossiers":{
"type": "nested",
"date_detection": "false",
"properties": {
"dossierCommercial": {
"type": "long"
},
"sousDossierCommercial": {
"type": "long"
},
"historiqueProduitBPI": {
"type": "string"
},
"statutSousDossier": {
"type": "string"
},
"dateDecision": {
"type": "date",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"nomChargesAffaires": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"contactChargesAffaires": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"montantAide": {
"type": "double",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"contentValidation": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"contentDecision": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"contentDirectionEngagements": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"metaDomain": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"sousSecteur": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"keywords": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"descriptionProjet": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
}
}
},
"investissements": {
"type": "nested",
"date_detection": "false",
"properties": {
"flag_indirect": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"nom_societe_gestion_svi":{
"type": "string"
},
"date_entree_investissement":{
"type": "date",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"montant_investissement_df":{
"type": "double"
},
"description_projet_investissement":{
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
}
}
},
"bilans":{
"type": "nested",
"date_detection": "false",
"properties": {
"bilanAnneeN": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
},
"bilanAnneeN1": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
},
"bilanAnneeN2": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
}
}
},
"news": {
"type": "nested",
"date_detection": "false",
"properties": {
"date": {
"type": "date",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"description": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"title": {
"type": "string"
},
"content": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"url": {
"type": "string"
},
"tags": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"links": {
"type": "string"
},
"external_source": {
"type": "string"
}
}
}
}
}
}
}
目前,我正在使用名为"同义词"的主域。这是个好主意吗?
提前感谢您的帮助。