我正在开发一个将数据发送到烧瓶后端的应用程序。然后烧瓶将接收到的内容插入弹性搜索中。插入弹性搜索之前,它将检查ID是否存在;如果ID存在,则将其更新,否则将插入索引。
示例代码:
from flask import Flask
from flask import jsonify, request
import jwt
from elasticsearch import Elasticsearch
app = Flask(__name__)
@app.route('/test',methods=['POST'])
def hello():
try:
id = request.form['id']
database = "sample"
es =Elasticsearch("localhost",port = 9200)
cols=es.search(index=database, body={ "query": { "match": { "id": id}}})
present =False
if cols['hits']['hits']:
x1=cols['hits']['hits'][0]['_source']
eid = cols['hits']['hits'][0]['_id']
present =True
if present == False:
newvalues = {"url":"hello",'id':id}
es.index(index=database, doc_type="logs", body=newvalues)
else: #if already there append data
newvalues ={}
es.update(index=database,doc_type='logs',id=eid,body={"doc":newvalues})
return jsonify({'status': 'success'})
except jwt.InvalidTokenError as e:
print(e)
return jsonify({'success': 'false', 'message': 'Invalid Token!!!'})
if __name__=="__main__":
try:
app.run(host="localhost",port=5005,debug=True,processes =1)
except Exception as e:
print("exception in test",e)
这里的问题是,请求每5秒从前端发送一次。因此有时会冲突,即,每当收到带有ID的请求时,同时ID的插入过程就会发生。第二个请求假定该id在数据库中不存在,因此它还会插入,从而在索引中转入2个具有相同id的数据。我该怎么做一次插入一个,而另一个应等待?
python-3.6
已编辑: 尝试使用信号量:
from flask import Flask
from flask import jsonify, request
import jwt
from elasticsearch import Elasticsearch
import threading
sLock = threading.Semaphore()
app = Flask(__name__)
@app.route('/test',methods=['POST'])
def hello():
sLock.acquire()
try:
id = request.form['id']
database = "sample"
es =Elasticsearch("localhost",port = 9200)
cols=es.search(index=database, body={ "query": { "match": { "id": id}}})
present =False
if cols['hits']['hits']:
x1=cols['hits']['hits'][0]['_source']
eid = cols['hits']['hits'][0]['_id']
present =True
if present == False:
newvalues = {"url":"hello",'id':id}
es.index(index=database, doc_type="logs", body=newvalues)
else: #if already there append data
newvalues ={}
es.update(index=database,doc_type='logs',id=eid,body={"doc":newvalues})
sLock.release()
return jsonify({'status': 'success'})
except jwt.InvalidTokenError as e:
print(e)
return jsonify({'success': 'false', 'message': 'Invalid Token!!!'})
if __name__=="__main__":
try:
app.run(host="localhost",port=5005,debug=True,processes =1)
except Exception as e:
print("exception in test",e)
谢谢!
答案 0 :(得分:1)
您可以使用mget方法并设置时间阈值。这样,您就不会发送时间请求,而是发送包含ID列表的请求-doc here
from datetime import datetime, timedelta
from elasticsearch import Elasticsearch
from elasticsearch import helpers
idL = [] # create it before the flask route declaration
threshold = 5 #set a threshold in the same way
now = datetime.now()
delta = timedelta(seconds=30) # set a time threshold of 1 minute
def update(result):
for success, info in helpers.parallel_bulk(client= es, actions=createUpdateElem(result ):
if not success:
print(info)
def index(result):
for success, info in helpers.parallel_bulk(client= es, actions=createIndexElem(result ):
if not success:
print(info)
def createIndexElem(result):
for elem in result:
yield {
'_op_type': 'index',
'_index': 'database',
'_id': elem,
'_source': {'question': 'The life, universe and everything.'}
}
def createUpdateElem(result):
for elem in result:
yield {
'_op_type': 'update',
'_index': 'database',
'_id': elem,
'doc': {'question': 'The life, universe and everything.'}
}
def checkResponse(response, idL):
updateL = []
for elem in response['docs']:
if elem['_id'] in idL:
updateL.append(elem['_id'])
toBeIndexed = list(set(idL) - set(updateL))
return toBeIndexed,updateL
def multiget(idL):
response = es.mget(index = 'database',body = {'ids': idL})
doc2BeIndicized = checkResponse(response, idL)
now = datetime.now()
idL = []
return doc2BeIndicized
@app.route('/test',methods=['POST'])
def hello():
try:
id = request.form['id']
idL.append(id)
if len(idL) > threshold:
result = multiget(idL)
if result:
indexed, updated = result
if updated:
update(updated)
if indexed:
index(indexed)
elif (now + delta) > datetime.now():
result = multiget(idL)
if result:
indexed, updated = result
if updated:
update(updated)
if indexed:
index(indexed)
else:
continue
以相同的方式,您可以使用批量(或并行批量)索引或更新文档列表,这在服务中更好,因为它使用多线程。 doc here。请记住,您需要分析mget调用的响应,因为在列表中,可能只有es中存在某些元素,而其他元素中没有