我从kafka读取的akka消费者仅读取几千条记录。我必须从kafka中读取1亿条记录,然后写入弹性搜索。当前的配置需要2天的时间
有什么方法可以使其更快。我猜想当akka从kafka读取内容时,节流正在发生。
My-es-indexer {
recordbuffer {
size: 1638400 }
akka.kafka {
consumer {
commit-refresh-interval: "12 hours"
}
committer {
max-batch: 1000000
parallelism : 10000
}
}
}
val bulkProcessor = BulkProcessor.builder(
(request, bulkListener) => esClient.bulkAsync(request, RequestOptions.DEFAULT, bulkListener),
new BulkProcessor.Listener() {
def beforeBulk(executionId: Long, request: BulkRequest) {
}
def afterBulk(executionId: Long, request: BulkRequest, response: BulkResponse) {
}
def afterBulk(executionId: Long, request: BulkRequest, failure: Throwable) {
logger.warn("error while executing bulk", failure);
}
})
.setBulkActions(100000)
.setBulkSize(new ByteSizeValue(10, ByteSizeUnit.MB))
.setFlushInterval(TimeValue.timeValueSeconds(5))
.setConcurrentRequests(1000)
.setBackoffPolicy(
BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3))
.build()
bulkProcessor.add(new IndexRequest("myindex", "doc", "myid").source(jsondata))