如何处理大量的ElasticSearch Index操作?

时间:2016-04-07 17:50:20

标签: node.js elasticsearch firebase ubuntu-14.04

我使用Node.js为照片共享应用开发了一个后端 我使用Firebase作为数据库。
这是一段截图。 'Posts' section in Firebase
我将把'Posts'部分索引到ElasticSearch(https://www.firebase.com/blog/2014-01-02-queries-part-two.html)。 这是索引代码:

// initialize our ElasticSearch API
var client = new ElasticClient({ host: 'localhost', port: 9200 });

// listen for changes to Firebase data
var fb = new Firebase('https://mydb.firebaseio.com/Posts');
fb.on('child_added',   createOrUpdateIndex);
fb.on('child_changed', createOrUpdateIndex);
fb.on('child_removed', removeIndex);
var index = 'firebase';
var type = 'post';
function createOrUpdateIndex(snap) {
   //var data = snap.val();
   //console.log(data);
   client.index(index, type, snap.val(), snap.key())
     .on('data', function(data) {
        console.log('indexed ', snap.key());
     })
     .on('error', function(err) { console.log(err); }).exec();
}

function removeIndex(snap) {
   client.deleteDocument(index, type, snap.key(), function(error, data) {
      if( error ) console.error('failed to delete', snap.key(), error);
      else console.log('deleted', snap.key());
   });
}

它与数百个帖子配合得很好。 但是有超过10K的帖子,它会在ElasticSearch日志窗口中产生错误,如下所示:

[2016-04-07 16:15:32,851][WARN ][indices.cluster          ] [Caretaker] [[firebase][1]] marking and sending shard failed due to [engine failure, reason [index]]
java.nio.file.FileSystemException: /Users/user/Downloads/elasticsearch-2.3.1/data/elasticsearch/nodes/0/indices/firebase/1/index/_a.fdt: Too many open files in system
	at sun.nio.fs.UnixException.translateToIOException(UnixException.java:91)
	at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
	at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
	at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214)
	at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:430)
	at java.nio.file.Files.newOutputStream(Files.java:172)
	at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:271)
	at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:224)
	at org.apache.lucene.store.FileSwitchDirectory.createOutput(FileSwitchDirectory.java:155)
	at org.apache.lucene.store.RateLimitedFSDirectory.createOutput(RateLimitedFSDirectory.java:40)
	at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:73)
	at org.apache.lucene.store.LockValidatingDirectoryWrapper.createOutput(LockValidatingDirectoryWrapper.java:44)
	at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:43)
	at org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.<init>(CompressingStoredFieldsWriter.java:111)
	at org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsWriter(CompressingStoredFieldsFormat.java:128)
	at org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.fieldsWriter(Lucene50StoredFieldsFormat.java:183)
	at org.apache.lucene.index.DefaultIndexingChain.initStoredFieldsWriter(DefaultIndexingChain.java:81)
	at org.apache.lucene.index.DefaultIndexingChain.startStoredFields(DefaultIndexingChain.java:279)
	at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:316)
	at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:234)
	at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:450)
	at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1477)
	at org.elasticsearch.index.engine.InternalEngine.innerIndex(InternalEngine.java:541)
	at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:457)
	at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:601)
	at org.elasticsearch.index.engine.Engine$Index.execute(Engine.java:836)
	at org.elasticsearch.action.index.TransportIndexAction.executeIndexRequestOnPrimary(TransportIndexAction.java:237)
	at org.elasticsearch.action.index.TransportIndexAction.shardOperationOnPrimary(TransportIndexAction.java:158)
	at org.elasticsearch.action.index.TransportIndexAction.shardOperationOnPrimary(TransportIndexAction.java:66)
	at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryPhase.doRun(TransportReplicationAction.java:639)
	at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
	at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler.messageReceived(TransportReplicationAction.java:279)
	at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler.messageReceived(TransportReplicationAction.java:271)
	at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:75)
	at org.elasticsearch.transport.TransportService$4.doRun(TransportService.java:376)
	at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
	at java.lang.Thread.run(Thread.java:745)


我一直在努力解决这个错误,现在我发现这是因为在短时间内触发了大量的'child_added'事件时,ElasticSearch模块中打开的文件数量最多。·
我想我需要缓存索引操作以避免此错误,将打开文件的限制保持为默认值。 我怎么能这样做?

1 个答案:

答案 0 :(得分:0)

您可以使用信号量将运行索引操作的数量限制为指定的容量。看看我的帖子:https://stackoverflow.com/a/37456691/2733216

代码未经过测试,但应该有效。

// initialize our ElasticSearch API
var client = new ElasticClient({ host: 'localhost', port: 9200 });

// listen for changes to Firebase data
var fb = new Firebase('https://mydb.firebaseio.com/Posts');
fb.on('child_added',   createOrUpdateIndex);
fb.on('child_changed', createOrUpdateIndex);
fb.on('child_removed', removeIndex);
var index = 'firebase';
var type = 'post';

// Create a semaphore of capacity 1
var semaphore = require ('semaphore');

function createOrUpdateIndex(snap) {
   semaphore.take(function () {
   //var data = snap.val();
   //console.log(data);
   client.index(index, type, snap.val(), snap.key())
     .on('data', function(data) {           
        semaphore.leave();
        console.log('indexed ', snap.key());
     })
     .on('error', function(err) {
         semaphore.leave();
         console.log(err);
     }).exec();
   });
}

function removeIndex(snap) {
   client.deleteDocument(index, type, snap.key(), function(error, data) {
      if( error ) console.error('failed to delete', snap.key(), error);
      else console.log('deleted', snap.key());
   });
}

然后遵循系统并发容量,您可以将1调整为x