我正在使用elasticsearch来搜索播放其中的演员的电影。当我搜索例如“莱昂纳多·迪卡普里奥”有10部左右的电影让我回来,但他们都有不同的分数。由于他们都有相同的演员,我希望他们有相同的分数。是否有人能够阐明为什么会发生这种情况并希望如何阻止它呢?
Elasticsearch版本1.7.2
映射:
{
"programs": {
"mappings": {
"program_doc_type": {
"properties": {
"cast": {
"type": "string",
"analyzer": "keyword_analyzer",
"fields": {
"name": {
"type": "string",
"analyzer": "name_analyzer"
}
}
},
"django_id": {
"type": "integer"
},
"has_poster": {
"type": "boolean"
},
"imdb_id": {
"type": "string",
"index": "not_analyzed"
},
"kind": {
"type": "string",
"index": "not_analyzed"
},
"record_url_count": {
"type": "integer"
},
"release_date": {
"type": "date",
"format": "dateOptionalTime"
},
"release_year": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "pattern"
},
"tms_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
分析仪:
"analysis": {
"analyzer": {
"keyword_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "keyword"
},
"name_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "whitespace"
}
}
}
查询:
{
"query": {
"match": {"cast.name": "leonardo dicaprio"}
}
}
首页结果:
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 62,
"max_score": 12.046804,
"hits": [
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1077511",
"_score": 12.046804,
"_source": {
"imdb_id": "tt4007278",
"tms_id": "",
"record_url_count": 0,
"release_date": "2014-08-20",
"title": "Carbon",
"has_poster": false,
"release_year": 2014,
"django_id": 1077511,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "812919",
"_score": 11.906615,
"_source": {
"imdb_id": "tt2076929",
"tms_id": "",
"record_url_count": 0,
"title": "Satori",
"has_poster": false,
"release_year": 2014,
"django_id": 812919,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "376792",
"_score": 11.886408,
"_source": {
"imdb_id": "tt0402538",
"tms_id": "",
"record_url_count": 0,
"title": "Titanic: The Premiere",
"has_poster": true,
"release_year": 2000,
"django_id": 376792,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "306106",
"_score": 11.69776,
"_source": {
"imdb_id": "tt0325727",
"tms_id": "",
"record_url_count": 0,
"release_date": "1998-08-16",
"title": "Leo Mania",
"has_poster": true,
"release_year": 1998,
"django_id": 306106,
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "269743",
"_score": 9.637444,
"_source": {
"imdb_id": "tt0286234",
"tms_id": "",
"record_url_count": 0,
"title": "Total Eclipse",
"has_poster": false,
"release_year": 1995,
"django_id": 269743,
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Agnieszka Holland"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "840945",
"_score": 9.358208,
"_source": {
"imdb_id": "tt2195237",
"tms_id": "",
"record_url_count": 0,
"release_date": "2004-12-01",
"title": "MovieReal: The Aviator",
"has_poster": false,
"release_year": 2004,
"django_id": 840945,
"kind": "series",
"cast": [
"Leonardo DiCaprio",
"Martin Scorsese"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "382168",
"_score": 9.358208,
"_source": {
"imdb_id": "tt0408269",
"tms_id": "",
"record_url_count": 0,
"release_date": "1998-09-29",
"title": "To Leo with Love",
"has_poster": true,
"release_year": 1998,
"django_id": 382168,
"kind": "movie",
"cast": [
"Jo Wyatt",
"Leonardo DiCaprio"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "846212",
"_score": 7.2280827,
"_source": {
"imdb_id": "tt2218442",
"tms_id": "",
"record_url_count": 0,
"title": "Legacy of Secrecy",
"has_poster": false,
"release_year": 1947,
"django_id": 846212,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio",
"Robert De Niro",
"D'Anthony Palms"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "595027",
"_score": 7.1439695,
"_source": {
"imdb_id": "tt1294988",
"tms_id": "",
"record_url_count": 0,
"release_date": "2006-09-27",
"title": "Emporio Armani 'Red' One Night Only",
"has_poster": false,
"release_year": 2006,
"django_id": 595027,
"kind": "movie",
"cast": [
"Kim Cattrall",
"Leonardo DiCaprio",
"Beyoncé Knowles"
]
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "752646",
"_score": 7.1439695,
"_source": {
"imdb_id": "tt1826731",
"tms_id": "",
"record_url_count": 0,
"release_date": "2009-06-02",
"title": "Lives of Quiet Desperation: The Making of Revolutionary Road",
"has_poster": false,
"release_year": 2009,
"django_id": 752646,
"kind": "movie",
"cast": [
"Kathy Bates",
"Leonardo DiCaprio",
"Kate Winslet"
]
}
}
]
}
}
更新:
我禁用了字段长度规范,这似乎已经改进了很多,但它们仍然不完全相同。我还是很困惑。根据我所读到的,有三种方法可以确定相关性:
由于每个节目只有Leonardo Dicaprio有一次在我看来他们应该有相同的分数,但他们没有。也许我是误会。以下是禁用字段长度规范后的更新设置:
映射:
{
"programs": {
"mappings": {
"program_doc_type": {
"properties": {
"cast": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "keyword_analyzer",
"fields": {
"name": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "name_analyzer"
}
}
},
"django_id": {
"type": "integer"
},
"has_poster": {
"type": "boolean"
},
"imdb_id": {
"type": "string",
"index": "not_analyzed"
},
"kind": {
"type": "string",
"index": "not_analyzed"
},
"record_url_count": {
"type": "integer"
},
"release_date": {
"type": "date",
"format": "dateOptionalTime"
},
"release_year": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "pattern"
},
"tms_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
首页结果:
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 836,
"max_score": 13.778852,
"hits": [
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "421026",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 421026,
"imdb_id": "tt0449557",
"has_poster": false,
"release_date": "2005-05-24",
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Jeffrey M. Schwartz",
"Donald L. Barlett",
"James B. Steele"
],
"release_year": 2005,
"record_url_count": 0,
"title": "The Affliction of Howard Hughes: Obsessive-Compulsive Disorder"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "555015",
"_score": 13.778852,
"_source": {
"tms_id": "MV002510340000",
"django_id": 555015,
"imdb_id": "tt1130884",
"has_poster": true,
"release_date": "2010-02-19",
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Mark Ruffalo",
"Ben Kingsley",
"Max von Sydow"
],
"release_year": 2010,
"record_url_count": 2,
"title": "Shutter Island"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "104669",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 104669,
"imdb_id": "tt0108330",
"has_poster": true,
"release_date": "1993-04-23",
"kind": "movie",
"cast": [
"Robert De Niro",
"Ellen Barkin",
"Leonardo DiCaprio",
"Jonah Blechman"
],
"release_year": 1993,
"record_url_count": 1,
"title": "This Boy's Life"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "846212",
"_score": 13.778852,
"_source": {
"django_id": 846212,
"title": "Legacy of Secrecy",
"imdb_id": "tt2218442",
"has_poster": false,
"kind": "N/A",
"cast": [
"Leonardo DiCaprio",
"Robert De Niro",
"D'Anthony Palms"
],
"release_year": 1947,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "256632",
"_score": 13.778852,
"_source": {
"django_id": 256632,
"title": "The Movie Show",
"imdb_id": "tt0271918",
"has_poster": false,
"kind": "series",
"cast": [
"Ray Brady",
"Russell Crowe",
"Larry Day",
"Leonardo DiCaprio"
],
"release_year": 1986,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "269743",
"_score": 13.778852,
"_source": {
"django_id": 269743,
"title": "Total Eclipse",
"imdb_id": "tt0286234",
"has_poster": false,
"kind": "movie",
"cast": [
"Leonardo DiCaprio",
"Agnieszka Holland"
],
"release_year": 1995,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1007190",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 1007190,
"imdb_id": "tt3391950",
"has_poster": false,
"release_date": "2013-12-29",
"kind": "series",
"cast": [
"Leonardo DiCaprio",
"Jonah Hill",
"Martin Scorsese",
"Terence Winter"
],
"release_year": 2013,
"record_url_count": 0,
"title": "The Hollywood Reporter in Focus"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "1077511",
"_score": 13.778852,
"_source": {
"tms_id": "",
"django_id": 1077511,
"imdb_id": "tt4007278",
"has_poster": false,
"release_date": "2014-08-20",
"kind": "movie",
"cast": [
"Leonardo DiCaprio"
],
"release_year": 2014,
"record_url_count": 0,
"title": "Carbon"
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "302615",
"_score": 13.57246,
"_source": {
"django_id": 302615,
"title": "Directors: James Cameron",
"imdb_id": "tt0322031",
"has_poster": true,
"kind": "movie",
"cast": [
"Michael Biehn",
"James Cameron",
"Jamie Lee Curtis",
"Leonardo DiCaprio"
],
"release_year": 1997,
"record_url_count": 0,
"tms_id": ""
}
},
{
"_index": "programs",
"_type": "program_doc_type",
"_id": "509785",
"_score": 13.57246,
"_source": {
"tms_id": "",
"django_id": 509785,
"imdb_id": "tt0923573",
"has_poster": false,
"release_date": "2003-05-06",
"kind": "movie",
"cast": [
"Frank Abagnale Jr.",
"Amy Adams",
"Nathalie Baye",
"Leonardo DiCaprio"
],
"release_year": 2003,
"record_url_count": 0,
"title": "'Catch Me If You Can': The Casting of the Film"
}
}
]
}
}
结果得到了很大改善,但最后2个得分与其他结果不同。
答案 0 :(得分:0)
Elasticsearch相关性默认模型称为TF / IDF。您可以阅读更多相关信息here
您在搜索匹配中看到的_score
是由此模型计算的。
基本上,分数是对三个因素(more info here)的计算结果:
TF
IDF
正如您可以从上面推断的那样,因为包含leonardo dicaprio
的每个文档的匹配术语数不同,字段长度和匹配术语在整个索引中计数,其相关性得分也不同。
尽管如此,对于包含leonardo dicaprio
的文档,您获得的分数高于不包含// define the function
var toggleElements = function () {
if($('#formtype').val() == "A") {
$('#form1').show();
$('#form2').hide();
} else {
$('#form1').hide();
$('#form2').show();
}
};
// set the handler
$('#formtype').on('change', toggleElements);
// execute the function when the page loads
$(document).ready(toggleElements);
的文档。
希望它有所帮助。