环境
DB:Sybase Logstash:带有JDBC插件的2.2.0,Elasticsearch输出插件
SQL查询:
select res.id as 'res.id', res.name as 'res.name', tag.name as 'tag.name'
from Res res, ResTags rt, Tags tag
where res.id *= rt.resrow and rt.tagid *= tag.id
SQL结果:
res.id | res.name | tag.name
0 | result0 | null
0 | result0 | tagA
1 | result1 | tagA
1 | result1 | tagB
2 | result2 | tagA
2 | result2 | tagC
索引映射:
{
"mappings": {
"res": {
"properties": {
"id": { "type": "long"},
"name": { "type": "string" },
"tags": {
"type": "nested",
"properties": { "tagname": { "type": "string" }}
}
}
}
}
配置文件:
input {
jdbc {
jdbc_driver_library => "jtds-1.3.1.jar"
jdbc_driver_class => "Java::net.sourceforge.jtds.jdbc.Driver"
jdbc_connection_string => "jdbc:jtds:sybase://hostname.com:1234/schema"
jdbc_user => "george"
jdbc_password => "monkey"
jdbc_fetch_size => 100
statement_filepath => "/home/george/sql"
}
}
output {
elasticsearch {
action => "update"
index => "myres"
document_type => "res"
document_id => "%{res.id}"
script_lang => "groovy"
hosts => [ "my.other.host.com:5921" ]
upsert => ' {
"id" : %{res.id},
"name" : "%{res.name}",
"tags" :[{ "tagname": "%{tag.name}" }]
}'
script => '
if (ctx._source.res.tags.containsValue(null)) {
// if null has been added replace it with actual value
cts._source.res.tags = [{"tagname": "%{tag.name}" }];
else {
// if you find the tag, then do nothing
if (ctx._source.res.tags.containsValue("%{tag.name}")) {}
else {
// if the value you try to add is not null
if (%{tag.name} != null)
// add it as a new object into the tag array
ctx._source.res.tags += {"tagname": "%{tag.name}"};
}
}
'
}
}
GOAL是将从数据库返回的多行添加到ES中,将标记连接为新对象(这是简化示例,因此add_tag和filters不能完成工作,因为我的json结构深于2级(嵌套嵌套,等)))
批量上传到ES后的预期结果是:
{
"hits": {
"total": 3,
"max_score": 1,
"hits": [ {
"_index": "myres",
"_type": "res",
"_id": 0,
"_score": 1,
"_source": {
"res": {
"id":0,
"name": "result0",
"tags": [{"tagname": "tagA"}],
"@version": "2",
"@timestamp": "2016-xx-yy..."
}
},{
"_index": "myres",
"_type": "res",
"_id": 1,
"_score": 1,
"_source": {
"res": {
"id":1,
"name": "result1",
"tags": [{"tagname": "tagA"},{"tagname": "tagB"}],
"@version": "2",
"@timestamp": "2016-xx-yy..."
}
}{
"_index": "myres",
"_type": "res",
"_id": 2,
"_score": 1,
"_source": {
"res": {
"id":2,
"name": "result2",
"tags": [{"tagname": "tagA"},{"tagname": "tagC"],
"@version": "2",
"@timestamp": "2016-xx-yy..."
}
}
}
...
ISSUE:如果在conf,输出部分中脚本未被注释掉,则会弹出以下错误。如果未包含脚本,则仅导入初始标记(按预期方式),而不导入第二个标记。
看起来脚本在elasticsearch输出中不起作用。
错误消息:
[400] {"error":"ActionRequestValidationException[Validation Failed:
1: script or doc is missing;
2: script or doc is missing;
3: script or doc is missing;],"status":400]} {:class=> ... bla bla ...}
备注