如果CSV文件中的行大约为10或2万(1或2 MB),则此脚本按预期工作。如果我尝试推送10万行(10 MB),我会收到错误。
import elasticsearch
es = elasticsearch.Elasticsearch('https://some_site.com/')
import csv
import urllib2
response=open('/var/lib/mysql_orig/test/head.csv', 'r')
csv_file_object = csv.reader(response)
header=['s_id', 'col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'col9', 'col10', 'col11']
bulk_data = []
op_dict=dict()
for row in csv_file_object:
#row = unicode(row, errors='ignore')
data_dict = {}
op_dict={}
for i in range(len(row)):
data_dict[header[i]] = unicode(row[i], errors='ignore')
op_dict = {
"index": {
"_index": "student",
"_type": "marks",
"_id": data_dict['s_id']
}
}
bulk_data.append(op_dict)
bulk_data.append(data_dict)
es.indices.delete(index = 'student')
es.indices.create(index = 'student', body = request_body)
# bulk index the data
res = es.bulk(index = 'student', body = bulk_data, refresh = True)
ConnectionError: ConnectionError(('Connection aborted.', error(32, 'Broken pipe'))) caused by: ProtocolError(('Connection aborted.', error(32, 'Broken pipe')))
是否有任何设置允许我通过TCP发送大数据包(默认弹性搜索端口9200)?