我正在尝试在原始字段上设置聚合。我有一个架构定义为:
my $result = $es->indices->create(
index => 'events',
body => {
#number_of_shards => 3,
#number_of_replicas => 2,
analysis => {
analyzer => {
default => {
tokenizer => 'standard',
char_filter => ['html_strip'],
filter => [qw(standard lowercase stop asciifolding)],
}
}
},
event => {
properties => {
event_id => { type => 'integer' },
escaped_name => { type => 'string', index => 'not_analyzed' },
name => { type => 'string' },
description => { type => 'string' },
venue => {
type => 'string',
fields => {
raw => { type => 'string', index => 'not_analyzed' },
},
},
escaped_venue => { type => 'string', index => 'not_analyzed' },
address => { type => 'string' },
postcode => { type => 'string' },
type => { type => 'string' },
tags => { type => 'string' },
start_date => { type => 'date' },
end_date => { type => 'date' },
courses => {
type => 'nested',
include_in_parent => 1,
properties => {
name => { type => 'string' },
distance => { type => 'integer' },
price => { type => 'integer' },
},
},
}
}
},
);
好的,这是使用Perl API,但希望你明白了。
如果我查看索引,对我来说似乎没问题:
"events" : {
"settings" : {
"index" : {
"creation_date" : "1417040345427",
"uuid" : "Ep-7XxfTQIO8pN7CG67_JA",
"number_of_replicas" : "1",
"analysis" : {
"analyzer" : {
"default" : {
"char_filter" : [ "html_strip" ],
"filter" : [ "standard", "lowercase", "stop", "asciifolding" ],
"tokenizer" : "standard"
}
}
},
"number_of_shards" : "5",
"event" : {
"properties" : {
"tags" : {
"type" : "string"
},
"end_date" : {
"type" : "date"
},
"escaped_name" : {
"type" : "string",
"index" : "not_analyzed"
},
"escaped_venue" : {
"type" : "string",
"index" : "not_analyzed"
},
"courses" : {
"type" : "nested",
"include_in_parent" : "1",
"properties" : {
"distance" : {
"type" : "integer"
},
"name" : {
"type" : "string"
},
"price" : {
"type" : "integer"
}
}
},
"address" : {
"type" : "string"
},
"description" : {
"type" : "string"
},
"name" : {
"type" : "string"
},
"event_id" : {
"type" : "integer"
},
"type" : {
"type" : "string"
},
"postcode" : {
"type" : "string"
},
"venue" : {
"type" : "string",
"fields" : {
"raw" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"start_date" : {
"type" : "date"
}
}
},
"version" : {
"created" : "1040099"
}
}
},
"mappings" : {
"event" : {
"properties" : {
"address" : {
"type" : "string"
},
"courses" : {
"properties" : {
"distance" : {
"type" : "long"
},
"name" : {
"type" : "string"
},
"price" : {
"type" : "long"
}
}
},
"description" : {
"type" : "string"
},
"end_date" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"escaped_name" : {
"type" : "string"
},
"escaped_venue" : {
"type" : "string"
},
"event_id" : {
"type" : "long"
},
"name" : {
"type" : "string"
},
"postcode" : {
"type" : "string"
},
"start_date" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"tags" : {
"type" : "string"
},
"type" : {
"type" : "string"
},
"venue" : {
"type" : "string"
}
}
}
}
}
}
我已将两个文件编入索引,这两个文件都包含一个场地。我是否应该能够在搜索中使用venue.raw
选择_source
?如果我,它似乎没有返回任何东西:
curl -X POST http://localhost:9200/events/_search?pretty -d ' {
"query": { "match_all": { } },
"_source": ["venue.raw", "venue", "escaped_venue"]
}'
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 1.0,
"hits" : [ {
"_index" : "events",
"_type" : "event",
"_id" : "AUnuL7oEm0fJj8toLjY5",
"_score" : 1.0,
"_source":{"escaped_venue":"Herne Hill Velodrome","venue":"Herne Hill Velodrome"}
}, {
"_index" : "events",
"_type" : "event",
"_id" : "AUnuL7nYm0fJj8toLjY4",
"_score" : 1.0,
"_source":{"escaped_venue":"Queen Elizabeth Olympic Park","venue":"Queen Elizabeth Olympic Park"}
} ]
}
}
我也尝试在“venue.raw”字段(以及“escaped_venue”作为测试)聚合:
my $search_args = {
index => 'events',
type => 'event',
body => {
aggs => {
type => {
terms => {
field => 'type',
},
},
tags => {
terms => {
field => 'tags',
},
},
'escaped_venue' => {
terms => {
field => 'escaped_venue',
},
},
venue => {
terms => {
field => "venue.raw",
},
},
distance => {
range => {
field => 'distance',
ranges => [
{ from => 0, to => 30 },
{ from => 30, to => 50 },
{ from => 50, to => 100 },
{ from => 100, to => 150 },
{ from => 150, to => 200 },
{ from => 200 },
],
},
},
price => {
range => {
field => 'price',
ranges => [
{ from => 0, to => 10 },
{ from => 10, to => 20 },
{ from => 20, to => 30 },
{ from => 30, to => 50 },
{ from => 50 },
],
},
},
},
},
};
我对“venue.raw”的聚合没有任何结果,我看到“escaped_venue”的分析结果:
{
_shards => { failed => 0, successful => 5, total => 5 },
aggregations => {
distance => {
buckets => [
{
doc_count => 0,
from => 0,
from_as_string => "0.0",
key => "0.0-30.0",
to => 30,
to_as_string => "30.0",
},
{
doc_count => 1,
from => 30,
from_as_string => "30.0",
key => "30.0-50.0",
to => 50,
to_as_string => "50.0",
},
{
doc_count => 0,
from => 50,
from_as_string => "50.0",
key => "50.0-100.0",
to => 100,
to_as_string => "100.0",
},
{
doc_count => 1,
from => 100,
from_as_string => "100.0",
key => "100.0-150.0",
to => 150,
to_as_string => "150.0",
},
{
doc_count => 2,
from => 150,
from_as_string => "150.0",
key => "150.0-200.0",
to => 200,
to_as_string => "200.0",
},
{ doc_count => 0, from => 200, from_as_string => "200.0", key => "200.0-*" },
],
},
escaped_venue => {
buckets => [
{ doc_count => 1, key => "elizabeth" },
{ doc_count => 1, key => "herne" },
{ doc_count => 1, key => "hill" },
{ doc_count => 1, key => "olympic" },
{ doc_count => 1, key => "park" },
{ doc_count => 1, key => "queen" },
{ doc_count => 1, key => "velodrome" },
],
doc_count_error_upper_bound => 0,
sum_other_doc_count => 0,
},
price => {
buckets => [
{
doc_count => 0,
from => 0,
from_as_string => "0.0",
key => "0.0-10.0",
to => 10,
to_as_string => "10.0",
},
{
doc_count => 0,
from => 10,
from_as_string => "10.0",
key => "10.0-20.0",
to => 20,
to_as_string => "20.0",
},
{
doc_count => 0,
from => 20,
from_as_string => "20.0",
key => "20.0-30.0",
to => 30,
to_as_string => "30.0",
},
{
doc_count => 2,
from => 30,
from_as_string => "30.0",
key => "30.0-50.0",
to => 50,
to_as_string => "50.0",
},
{ doc_count => 0, from => 50, from_as_string => "50.0", key => "50.0-*" },
],
},
tags => {
buckets => [
{ doc_count => 2, key => "charity" },
{ doc_count => 1, key => "closed" },
{ doc_count => 1, key => "road" },
],
doc_count_error_upper_bound => 0,
sum_other_doc_count => 0,
},
type => {
buckets => [{ doc_count => 2, key => "sportive" }],
doc_count_error_upper_bound => 0,
sum_other_doc_count => 0,
},
venue => {
buckets => [],
doc_count_error_upper_bound => 0,
sum_other_doc_count => 0,
},
},
hits => {
hits => [
{
_id => "AUnuL7oEm0fJj8toLjY5",
_index => "events",
_score => 0.067124054,
_source => {
address => "104 Burbage Road, London",
courses => [
{ distance => 48, name => "The Short Route", price => 40 },
{ distance => 104, name => "The Medium Route", price => 46 },
{ distance => 166, name => "The Long Route", price => 48 },
],
description => "Formerly known as Ride Around London, the Financial Times London Cycle Sportive, supporting Access Sport will take place on Sunday 30 June and the route takes in many important landmarks from London\xE2\x80\x99s Olympic history including a unique finish on the 1948 Olympic Velodrome at Herne Hill.",
end_date => "2013-06-30",
escaped_name => "financial_times_london_cycle_sportive",
escaped_venue => "Herne Hill Velodrome",
event_id => 2,
name => "Financial Times London Cycle Sportive",
postcode => "SE24 9HE",
start_date => "2013-06-30",
tags => ["charity"],
type => "sportive",
venue => "Herne Hill Velodrome",
},
_type => "event",
},
{
_id => "AUnuL7nYm0fJj8toLjY4",
_index => "events",
_score => 0.04746387,
_source => {
address => "London",
courses => [{ distance => 161, price => 48 }],
description => "Taking a cue from the London Marathon, amateur cyclists will also participate in the Prudential RideLondon-Surrey 100 \xE2\x80\x93 a 100-mile challenge on the same closed roads as the professionals, with the added incentive of raising money for good causes. To capture it all, the best action will be broadcast live on TV in the UK and internationally, to be seen by an audience of millions.",
end_date => "2013-08-04",
escaped_name => "prudential_ridelondon_surrey_100",
escaped_venue => "Queen Elizabeth Olympic Park",
event_id => 1,
name => "Prudential RideLondon-Surrey 100",
postcode => "E20 2ST",
start_date => "2013-08-04",
tags => ["charity", "closed road"],
type => "sportive",
venue => "Queen Elizabeth Olympic Park",
},
_type => "event",
},
],
max_score => 0.067124054,
total => 2,
},
timed_out => bless(do{\(my $o = 0)}, "JSON::XS::Boolean"),
took => 12,
}
如果我通过cURL执行此操作,我还会看到“venue.raw”聚合的空桶。
这里发生了什么?我是否需要明确地填充原始字段?
答案 0 :(得分:0)
实际上,在尝试移植到最新版本的Elasticsearch之后,我没有正确创建索引。
应该是:
my $result = $es->indices->create(
index => 'events',
body => {
#number_of_shards => 3,
#number_of_replicas => 2,
analysis => {
analyzer => {
default => {
tokenizer => 'standard',
char_filter => ['html_strip'],
filter => [qw(standard lowercase stop asciifolding)],
}
}
},
mappings => {
event => {
properties => {
event_id => { type => 'integer' },
escaped_name => { type => 'string', index => 'not_analyzed' },
name => { type => 'string' },
description => { type => 'string' },
venue => {
type => 'string',
fields => {
raw => { type => 'string', index => 'not_analyzed' },
},
},
escaped_venue => { type => 'string', index => 'not_analyzed' },
address => { type => 'string' },
postcode => { type => 'string' },
type => { type => 'string' },
tags => { type => 'string' },
start_date => { type => 'date' },
end_date => { type => 'date' },
courses => {
type => 'nested',
include_in_parent => 1,
properties => {
name => { type => 'string' },
distance => { type => 'integer' },
price => { type => 'integer' },
},
},
},
}
}
},
);
现在聚合按预期工作。