elasticsearch:将api流结果滚动到“无活动连接”

时间:2018-06-20 12:09:45

标签: elasticsearch rxjs

我正在尝试将elasticsearch查询转换为rxjs流,并因此通过一次获取1个数据点来使用Scroll API。这是node.js

中的代码
// elasticClient initialized only with { host: 'hostname.domain:9200' } 
import elasticClient from '../elasticHelper';
import * as Rx from 'rxjs';
import * as _ from 'lodash';

function scrollElastic(scroll, scrollId) {
    return Rx.Observable
        .fromPromise(elasticClient.scroll({scroll, scrollId}))
        .map(({_scroll_id, hits: {hits}}) =>
            ({scrollId: _scroll_id, data: hits.map(x => x._source)[0]}));
}

function searchElastic({query, sort}) {
    const body = {
        size: 1,
        query,
        _source: { excludes: ['editable'] },
        sort
    };
    // keep the search results "scrollable" for 30 secs
    const scroll = '30s';

    return Rx.Observable
        .fromPromise(elasticClient.search({ index: 'data', body, scroll }))
        .map(({_scroll_id, hits: {hits}}) =>
            ({scrollId: _scroll_id, data: hits.map(x => x._source)[0]}))
        // invokes scrollElastic recursively
        .expand(({scrollId}) => scrollElastic(scroll, scrollId))
        // stop resursion when there are no more data
        .takeWhile(res => res.data != null)
        .map(x => x.data);
}

function updateAncestors(ancestors, e, format) {
    if(format === 'csv' || format === 'json') {
        const idx = ancestors.indexOf(e.id);
        if(idx > -1) {
            ancestors[idx] = _.pick(e, ['type', 'name']);
        }
    }
}

function getEntryQueryStream(entriesQuery, query) {
    const {parentSearchFilter, filter, format} = query;

    return searchElastic(entriesQuery)
        .concatMap(entry => {
            const ancestors = entry.ancestors || [];

            // if no parents => doesn't match
            if(!ancestors.length) {
                return Rx.Observable.empty();
            }

            const parentsQuery = getElasticQuery(parentSearchFilter, filter);
            parentsQuery.query.filtered.filter.bool.must.push({
                terms: {
                    id: ancestors
                }
            });

            // fetch parent entries
            return searchElastic(parentsQuery)
                .do(e => updateAncestors(ancestors, e, format))
                .count()
                .concatMap(count => {
                    // no parents match query
                    if(!count) {
                        return Rx.Observable.empty();
                    }

                    // fetch all other ancestors that weren't part of the query results
                    // and are still a string (id)
                    const restAncestorsToFetch = ancestors.filter(x => _.isString(x));
                    return fetchAncestors(restAncestorsToFetch, ancestors, format)
                        .concatMap(() => Rx.Observable.just(entry));
                });
        });
}

function executeQuery(query, res) {
    try {
        const entriesQuery = {
            query: {
                filtered: {
                    filter: {
                        bool: {
                            must: [{
                                range: {
                                    creationTimestamp: {
                                        // some dates passed here
                                        gte: startdate.toISOString(),
                                        lte: enddate.toISOString()
                                    }
                                }
                            }, {
                                query: {
                                    query_string: {
                                        query: "+type:*bla* +name:*blabla*"
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            sort: [{creationTimestamp: {order: 'asc'}, id: {order: 'asc'}}]
        };

        getEntryQueryStream(entriesQuery, query)
            .concatMap(e => // do non elastic stuff)
            .map(e => // do stuff)
            .subscribe(
                x => res.write(x);
                err => {
                    console.error(err);
                    res.status(500).json(err);
                },
                () => res.end()
            );

        HandlerFactory.buildHandler(query, enrichedEntries, res);
    } catch(e) {
        logger.error(e);
        res.status(500).json(e);
    }
}

订阅流并处理了一些条目之后,elasticClient死于以下错误:

Elasticsearch ERROR: 2018-06-20T12:01:13Z
  Error: Request error, retrying -- connect EADDRNOTAVAIL X.X.X.X:9200 - Local (X.X.X.X:0)
      at Log.error (/home/xenia.siskaki/sandbox/webapi-server/node_modules/elasticsearch/src/lib/log.js:218:56)
      at checkRespForFailure (/home/xenia.siskaki/sandbox/webapi-server/node_modules/elasticsearch/src/lib/transport.js:211:18)
      at HttpConnector.<anonymous> (/home/xenia.siskaki/sandbox/webapi-server/node_modules/elasticsearch/src/lib/connectors/http.js:153:7)
      at ClientRequest.wrapper (/home/xenia.siskaki/sandbox/webapi-server/node_modules/lodash/index.js:3095:19)
      at emitOne (events.js:96:13)
      at ClientRequest.emit (events.js:188:7)
      at Socket.socketErrorListener (_http_client.js:309:9)
      at emitOne (events.js:96:13)
      at Socket.emit (events.js:188:7)
      at emitErrorNT (net.js:1277:8)
      at _combinedTickCallback (internal/process/next_tick.js:80:11)
      at process._tickDomainCallback (internal/process/next_tick.js:128:9)

Elasticsearch WARNING: 2018-06-20T12:01:13Z
  Unable to revive connection: http://.....:9200/

Elasticsearch WARNING: 2018-06-20T12:01:13Z
  No living connections

elasticsearch服务器日志中没有错误

This GitHub ticket comment表示问题在于打开文件描述符过多。但是,当我尝试手动清除滚动条时,它说未找到scrollId。我猜这可能不是问题,因为下一个对scrollElastic的调用仅在前一个返回结果时才发生,因此在耗尽后会自动关闭。

elasticsearch v1.7和驱动程序v8

env:OpenSUSE 12.3

node:v6.11.1

编辑

没有rxjs似乎可以正常工作

0 个答案:

没有答案