我正在尝试将elasticsearch
查询转换为rxjs
流,并因此通过一次获取1个数据点来使用Scroll API
。这是node.js
// elasticClient initialized only with { host: 'hostname.domain:9200' }
import elasticClient from '../elasticHelper';
import * as Rx from 'rxjs';
import * as _ from 'lodash';
function scrollElastic(scroll, scrollId) {
return Rx.Observable
.fromPromise(elasticClient.scroll({scroll, scrollId}))
.map(({_scroll_id, hits: {hits}}) =>
({scrollId: _scroll_id, data: hits.map(x => x._source)[0]}));
}
function searchElastic({query, sort}) {
const body = {
size: 1,
query,
_source: { excludes: ['editable'] },
sort
};
// keep the search results "scrollable" for 30 secs
const scroll = '30s';
return Rx.Observable
.fromPromise(elasticClient.search({ index: 'data', body, scroll }))
.map(({_scroll_id, hits: {hits}}) =>
({scrollId: _scroll_id, data: hits.map(x => x._source)[0]}))
// invokes scrollElastic recursively
.expand(({scrollId}) => scrollElastic(scroll, scrollId))
// stop resursion when there are no more data
.takeWhile(res => res.data != null)
.map(x => x.data);
}
function updateAncestors(ancestors, e, format) {
if(format === 'csv' || format === 'json') {
const idx = ancestors.indexOf(e.id);
if(idx > -1) {
ancestors[idx] = _.pick(e, ['type', 'name']);
}
}
}
function getEntryQueryStream(entriesQuery, query) {
const {parentSearchFilter, filter, format} = query;
return searchElastic(entriesQuery)
.concatMap(entry => {
const ancestors = entry.ancestors || [];
// if no parents => doesn't match
if(!ancestors.length) {
return Rx.Observable.empty();
}
const parentsQuery = getElasticQuery(parentSearchFilter, filter);
parentsQuery.query.filtered.filter.bool.must.push({
terms: {
id: ancestors
}
});
// fetch parent entries
return searchElastic(parentsQuery)
.do(e => updateAncestors(ancestors, e, format))
.count()
.concatMap(count => {
// no parents match query
if(!count) {
return Rx.Observable.empty();
}
// fetch all other ancestors that weren't part of the query results
// and are still a string (id)
const restAncestorsToFetch = ancestors.filter(x => _.isString(x));
return fetchAncestors(restAncestorsToFetch, ancestors, format)
.concatMap(() => Rx.Observable.just(entry));
});
});
}
function executeQuery(query, res) {
try {
const entriesQuery = {
query: {
filtered: {
filter: {
bool: {
must: [{
range: {
creationTimestamp: {
// some dates passed here
gte: startdate.toISOString(),
lte: enddate.toISOString()
}
}
}, {
query: {
query_string: {
query: "+type:*bla* +name:*blabla*"
}
}
}]
}
}
}
},
sort: [{creationTimestamp: {order: 'asc'}, id: {order: 'asc'}}]
};
getEntryQueryStream(entriesQuery, query)
.concatMap(e => // do non elastic stuff)
.map(e => // do stuff)
.subscribe(
x => res.write(x);
err => {
console.error(err);
res.status(500).json(err);
},
() => res.end()
);
HandlerFactory.buildHandler(query, enrichedEntries, res);
} catch(e) {
logger.error(e);
res.status(500).json(e);
}
}
订阅流并处理了一些条目之后,elasticClient
死于以下错误:
Elasticsearch ERROR: 2018-06-20T12:01:13Z
Error: Request error, retrying -- connect EADDRNOTAVAIL X.X.X.X:9200 - Local (X.X.X.X:0)
at Log.error (/home/xenia.siskaki/sandbox/webapi-server/node_modules/elasticsearch/src/lib/log.js:218:56)
at checkRespForFailure (/home/xenia.siskaki/sandbox/webapi-server/node_modules/elasticsearch/src/lib/transport.js:211:18)
at HttpConnector.<anonymous> (/home/xenia.siskaki/sandbox/webapi-server/node_modules/elasticsearch/src/lib/connectors/http.js:153:7)
at ClientRequest.wrapper (/home/xenia.siskaki/sandbox/webapi-server/node_modules/lodash/index.js:3095:19)
at emitOne (events.js:96:13)
at ClientRequest.emit (events.js:188:7)
at Socket.socketErrorListener (_http_client.js:309:9)
at emitOne (events.js:96:13)
at Socket.emit (events.js:188:7)
at emitErrorNT (net.js:1277:8)
at _combinedTickCallback (internal/process/next_tick.js:80:11)
at process._tickDomainCallback (internal/process/next_tick.js:128:9)
Elasticsearch WARNING: 2018-06-20T12:01:13Z
Unable to revive connection: http://.....:9200/
Elasticsearch WARNING: 2018-06-20T12:01:13Z
No living connections
elasticsearch
服务器日志中没有错误
This GitHub ticket comment表示问题在于打开文件描述符过多。但是,当我尝试手动清除滚动条时,它说未找到scrollId
。我猜这可能不是问题,因为下一个对scrollElastic
的调用仅在前一个返回结果时才发生,因此在耗尽后会自动关闭。
elasticsearch
v1.7和驱动程序v8
env:OpenSUSE 12.3
node
:v6.11.1
编辑
没有rxjs
似乎可以正常工作