我正在尝试在python中使用多处理来执行Cassandra查询。我已经使用
建立了一个队列futures = multiprocessing.Queue(maxsize=5)
我试图将此队列作为
传递给目标函数Process(target=worker, args=(monthyear, daymonthyear, ts1, country, lat, lon, sma, dma, etype, version, ihl, tos_dscp, totallen, idnum, fragoff, ttl, proto, hdrchksm, sip, dip, opts, t_sp, t_dp, t_sqnum, t_acknum, t_dataoff, t_flags, t_winsz, t_chksm, t_urgptr, t_opts, process_n, futures)).start()
我的目标函数是
def worker(monthyear, daymonthyear, ts1, country, lat, lon, sma, dma, etype, version, ihl, tos_dscp, totallen, idnum, fragoff, ttl, proto, hdrchksm, sip, dip, opts, t_sp, t_dp, t_sqnum, t_acknum, t_dataoff, t_flags, t_winsz, t_chksm, t_urgptr, t_opts, p, futures):
cluster = Cluster(['127.0.0.1'])
metadata = cluster.metadata
session = cluster.connect()
session.execute("USE test;")
print current_process().name
future = session.execute_async("INSERT INTO test.day (daymonthyear, ts, c_country, c_lat, c_lon, e_sma, e_dma, e_etype, ip_version, ip_ihl, ip_tos_dscp, ip_totallen, ip_idnum, ip_fragoff, ip_ttl, ip_proto, ip_hdrchksm, ip_sip, ip_dip, ip_opts, s_sp, s_dp, s_vtag, s_chksm) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);",(str(daymonthyear), int(ts1), str(country), str(lat), str(lon), str(sma), str(dma), str(etype), str(version), str(ihl), str(tos_dscp), int(totallen), int(idnum), str(fragoff), int(ttl), int(proto), str(hdrchksm), str(sip), str(dip), str(opts), int(s_sp), int(s_dp), int(s_vtag), str(s_chksm)))
futures.put_nowait(future)
future = session.execute_async("INSERT INTO test.month (monthyear, ts, c_country, c_lat, c_lon, e_sma, e_dma, e_etype, ip_version, ip_ihl, ip_tos_dscp, ip_totallen, ip_idnum, ip_fragoff, ip_ttl, ip_proto, ip_hdrchksm, ip_sip, ip_dip, ip_opts, u_sp, u_dp, u_len, u_chksm) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);",(str(monthyear), int(ts1), str(country), str(lat), str(lon), str(sma), str(dma), str(etype), str(version), str(ihl), str(tos_dscp), int(totallen), int(idnum), str(fragoff), int(ttl), int(proto), str(hdrchksm), str(sip), str(dip), str(opts), int(u_sp), int(u_dp), int(u_len), str(u_chksm)))
futures.put_nowait(future)
session.cluster.shutdown()
session.shutdown()
然后我在队列中检查一定数量的未来对象,并使用futures.get_nowait()从队列中删除对象,但是我得到了一个酸洗错误。队列应该是可以选择的,但我不知道为什么会出现这个错误。
答案 0 :(得分:1)
cassandra-driver对象不是pickle-able。即cluster
和session
我建议在worker
函数范围之外创建驱动程序对象,以避免它们被剔除。无论如何,每个查询都有一个会话和集群,这太过分了。
# Initialise global cluster connection.
cluster = Cluster(['127.0.0.1'])
metadata = cluster.metadata
session = cluster.connect()
futures = multiprocessing.Queue(maxsize=5)
Process( target=worker, args=(args) ).start()
session.cluster.shutdown()
session.shutdown()
# Workers now share global connection.
def worker(*args):
session.execute("USE test;")
print current_process().name
future = session.execute_async("INSERT INTO ...")
futures.put_nowait(future)
future = session.execute_async("INSERT INTO ...")
futures.put_nowait(future)`