我正在测试风暴(0.9.3)保证消息处理。在我的拓扑结构中,我有一个spout从Kafka读取消息并将其传递给一个将其写入文件并将其失败的螺栓。
这是代码
卡夫卡喷口
import random
import storm
from kafka import KafkaConsumer
import logging
log = logging.getLogger('kafka_spout')
logging.basicConfig(level=logging.ERROR,filename='/tmp/kafka_spout.log',format="%(message)s",filemode='a')
import csv
class KafkaSpout(storm.Spout):
def initialize(self, conf, context):
self.consumer = KafkaConsumer("test",group_id="test_group",metadata_broker_list=["broker:9092"],auto_commit_enable=False,auto_offset_reset='largest')
self.conf=conf
self.context=context
self.log_test="yes"
def nextTuple(self):
for message in self.consumer:
tup_id=random.randrange(999999999)
file_handle=open("/tmp/kafka_spout.csv","a")
writer=csv.writer(file_handle,delimiter=",")
writer.writerow([tup_id,message.offset,message.value,message.partition])
file_handle.close()
self.consumer.commit()
self.consumer.task_done(message)
storm.emit([tup_id,message.offset,message.value,message.partition],id=tup_id)
def fail(self, id):
f=open("/tmp/failed_tuples.txt","a")
f.write(str(id)+"\n")
f.close()
def ack(self, id):
f=open("/tmp/acked_tuples.txt","a")
f.write(str(id)+"\n")
f.close()
KafkaSpout().run()
试验螺栓
import time
import storm
import csv
import random
class TestBolt(storm.BasicBolt):
def initialize(self, conf, context):
self.context=context
self.conf=conf
def process(self,tup):
tup_id1,offset,message,message_partition=tup.values
tup_id2=tup.id
file_handle=open("/tmp/test_bolt.csv","a")
writer=csv.writer(file_handle,delimiter=",")
writer.writerow([tup_id1,offset,message,message_partition,tup_id2])
file_handle.close()
#time.sleep(random.randrange(29,38)) #test for replay on time out, but did not work.
storm.fail(tup)
TestBolt().run()
拓扑定义
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("KafkaSpout", new KafkaSpout(), 1);
builder.setBolt("TestBolt", new TestBolt(), 4).shuffleGrouping("KafkaSpout");
Config conf = new Config();
conf.setNumWorkers(2);
conf.setMaxSpoutPending(1000);
StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology())
正如Vor评论我编辑了我的Spout并且失败方法起作用了。这是新的喷口定义
新Kafka Spout
from kafka import KafkaConsumer
import logging
log = logging.getLogger('kafka_spout')
logging.basicConfig(level=logging.ERROR,filename='/tmp/kafka_spout.log',format="%(message)s",filemode='a')
import csv
class KafkaSpoutNew(storm.Spout):
def initialize(self, conf, context):
self.conf=conf
self.context=context
self.log_test="yes"
def nextTuple(self):
consumer=KafkaConsumer("test3",group_id="test_group",metadata_broker_list=["broker:9092"],auto_commit_enable=False, auto_commit_interval_ms=1 * 1000,auto_offset_reset='largest',consumer_timeout_ms=15000)
try:
message=consumer.next()
tup_id=random.randrange(999999999)
file_handle=open("/tmp/kafka_spout.csv","a")
writer=csv.writer(file_handle,delimiter=",")
writer.writerow([tup_id,message.offset,message.value,message.partition])
file_handle.close()
consumer.task_done(message)
consumer.commit()
storm.emit([tup_id,message.offset,message.value,message.partition],id=tup_id)
except kafka.common.ConsumerTimeout:
pass
def fail(self, id):
f=open("/tmp/failed_tuples.txt","a")
f.write(str(id)+"\n")
f.close()
def ack(self, id):
f=open("/tmp/acked_tuples.txt","a")
f.write(str(id)+"\n")
f.close()
KafkaSpoutNew().run()
问题
我的失败或确认方法似乎不能用作文件
/tmp/failed_tuples.txt
或/tmp/failed_tuples.txt
不是
创建。为什么呢?
为什么id=tup_id
在spout中提供的ID与id不同
在tup.id
的螺栓中注明?风暴使用哪个ID
ACKING?
在这种情况下如何实现重播逻辑? (需要建议。)