我在尝试使用 ZeroMQ
和 {{来测试pyzmq
(通过DEALER
)的失败恢复行为1}} 套接字。这是我的代码:
ROUTER
我观察到的行为如下:
import sys, zmq
import threading
import time, gc
import socket
def tprint(msg):
"""like print, but won't get newlines confused with multiple threads"""
sys.stdout.write(msg + '\n')
sys.stdout.flush()
class ClientWorker(threading.Thread):
def __init__(self, id, ports):
self.id = id
self.ports = ports
super(ClientWorker, self).__init__()
def run(self):
context = zmq.Context()
socket = context.socket(zmq.DEALER)
for port in self.ports:
socket.connect("tcp://localhost:%d" % port)
tprint("client %d started" % (self.id))
for ia in xrange(self.id*100,self.id*100+100):
socket.send_string('request %d' % (ia))
time.sleep(1)
socket.close()
context.term()
class ServerWorker(threading.Thread):
def __init__(self, port, maxReq=None):
self.port = port
self.maxReq = maxReq
super(ServerWorker, self).__init__()
def run(self):
context = zmq.Context()
socket = context.socket(zmq.ROUTER)
socket.bind("tcp://127.0.0.1:%d" % (self.port))
tprint("server started on port %d" % (self.port))
numReq = 0
while True:
ident, msg = socket.recv_multipart()
print self.port, ident, msg
numReq += 1
if self.maxReq and numReq >= self.maxReq:
tprint("server on port %d exiting" % (self.port))
break
socket.unbind("tcp://127.0.0.1:%d" % (self.port))
socket.close()
context.term()
def main():
ports = [5555,5556,5557]
servers = [ServerWorker(port,10 if port==5555 else None) for port in ports]
for s in servers: s.start()
for ia in xrange(1,6):
w = ClientWorker(ia, ports)
w.start()
servers[0].join()
servers[0] = None
gc.collect()
time.sleep(30)
tprint("restarting server")
s = ServerWorker(port)
s.start()
if __name__ == "__main__":
main()
处的服务器将打印出它退出的10个项目5555
新的服务器线程到端口bind()
时,我得到"正在使用的地址" 错误5555
,尝试context.term()
服务器对象等等。gc
套接字应该能够检测到其中一个服务器的故障并将工作重新分配给其余服务器?我怀疑也许它无法检测到故障的原因与端口DEALER
上的套接字保持打开的原因相同?5555
时,客户端将能够检测重新连接并以循环方式继续向服务器发送消息,同时考虑新服务器?答案 0 :(得分:2)
我是否正确期望DEALER套接字能够检测到其中一台服务器的故障并将工作重新分配给其余服务器?
不,这不是经销商的工作方式。经销商连接负载均衡,无论他们是否在那里。这意味着消息仍然排队到工作人员5555,即使它已经关闭。当工作人员5555返回时,将立即传递这些消息。
关于"正在使用的地址"的任何想法错误?
这是因为port
启动恢复的工作人员时ports[-1]
而不是ports[0]
,因此它绑定到了一个端口仍然被你的一个工人使用,而不是那个停止工作的工人。
我是否正确期望当我将服务器重新连接到端口5555时,客户端将能够检测重新连接并以考虑新服务器的循环方式继续向服务器发送消息?
是的,当消息回来时,消息将继续传送到5555,但我认为你哪些消息将被传送到那里是不对的。
通过对脚本进行一些小调整,我得到了输出:
server started on port 5555
server started on port 5556
server started on port 5557
client 1 started
client 2 started
client 3 started
client 4 started
client 5 started
5555 00800041a7 request 100
5555 00800041a8 request 200
5555 00800041a9 request 300
5555 00800041aa request 400
5555 00800041ab request 500
5556 0060b7acd9 request 101
5556 0060b7acdb request 301
5556 0060b7acdc request 401
5556 0060b7acdd request 501
5556 0060b7acda request 201
5557 004431b782 request 102
5557 004431b784 request 302
5557 004431b783 request 202
5557 004431b785 request 402
5557 004431b786 request 502
5555 00800041a7 request 103
5555 00800041a9 request 303
5555 00800041ab request 503
5555 00800041a8 request 203
5555 00800041aa request 403
server on port 5555 exiting
5556 0060b7acd9 request 104
5556 0060b7acda request 204
5556 0060b7acdd request 504
5556 0060b7acdb request 304
5556 0060b7acdc request 404
5557 004431b782 request 105
5557 004431b786 request 505
5557 004431b783 request 205
5557 004431b784 request 305
5557 004431b785 request 405
5556 0060b7acd9 request 107 <- note jump from 405 to 107
5556 0060b7acdc request 407
5556 0060b7acdd request 507
5556 0060b7acda request 207
5556 0060b7acdb request 307
restarting server on 5555
server started on port 5555
5557 004431b786 request 508
5557 004431b782 request 108
5557 004431b785 request 408
5557 004431b783 request 208
5557 004431b784 request 308
5555 0041c8aac3 request 506 <- here are the X06 messages on the new 5555 worker
5555 0041c8aac4 request 306
5555 0041c8aac5 request 406
5555 0041c8aac6 request 106
5555 0041c8aac7 request 206
5555 0041c8aac7 request 209
5555 0041c8aac4 request 309
5555 0041c8aac3 request 509
5555 0041c8aac5 request 409
5555 0041c8aac6 request 109
5556 0060b7acdd request 510
5556 0060b7acdb request 310
5556 0060b7acda request 210
5556 0060b7acdc request 410
5556 0060b7acd9 request 110
5557 004431b784 request 311
5557 004431b786 request 511
...
消息106-506 被发送到5555并稍后重新传送。当5555没有收到他们时,他们没有被重新安排到另一名工人。
您可以使用client_socket.hwm = N
来限制工作人员在开始将其排除在循环之前可能有多少邮件待处理,但您无法将其排除为零。
我使用的脚本版本:
from binascii import hexlify
import threading
import socket
import sys
import time
import zmq
def tprint(msg):
"""like print, but won't get newlines confused with multiple threads"""
sys.stdout.write(msg + '\n')
sys.stdout.flush()
class ClientWorker(threading.Thread):
def __init__(self, id, ports):
self.id = id
self.ports = ports
super(ClientWorker, self).__init__()
def run(self):
context = zmq.Context.instance()
socket = context.socket(zmq.DEALER)
socket.hwm = 1 # limit messages sent to dead workers
for port in self.ports:
socket.connect("tcp://localhost:%d" % port)
tprint("client %d started" % (self.id))
for ia in xrange(self.id*100,self.id*100+100):
socket.send_string('request %d' % (ia))
time.sleep(1)
socket.close()
context.term()
class ServerWorker(threading.Thread):
def __init__(self, port, maxReq=None):
self.port = port
self.maxReq = maxReq
super(ServerWorker, self).__init__()
def run(self):
context = zmq.Context.instance()
socket = context.socket(zmq.ROUTER)
tprint("server started on port %d" % (self.port))
socket.bind("tcp://127.0.0.1:%d" % (self.port))
numReq = 0
while True:
ident, msg = socket.recv_multipart()
print self.port, hexlify(ident), msg
numReq += 1
if self.maxReq and numReq >= self.maxReq:
tprint("server on port %d exiting" % (self.port))
break
socket.close()
context.term()
def main():
ports = [5555,5556,5557]
servers = [ServerWorker(port,10 if port==5555 else None) for port in ports]
for s in servers: s.start()
for ia in xrange(1,6):
w = ClientWorker(ia, ports)
w.start()
servers[0].join()
time.sleep(10)
port = ports[0]
tprint("restarting server on %i" % port)
s = ServerWorker(port)
s.start()
if __name__ == "__main__":
ctx = zmq.Context.instance()
try:
main()
finally:
ctx.term()