我正在尝试运行kafka -storm-cassandra,在我的情况下,tail2kafka本身就是一个生产者,当我开始使用cosumer来消费主题时,它会抛出提及错误。 请帮帮我。
由于
[2015-05-13 15:28:51,784] ERROR由于错误而关闭/127.0.0.1的套接字(kafka.network.Processor) java.lang.OutOfMemoryError:Java堆空间 at kafka.api.ProducerRequest $$ anonfun $ 1 $$ anonfun $ apply $ 1.apply(ProducerRequest.scala:45) at kafka.api.ProducerRequest $$ anonfun $ 1 $$ anonfun $ apply $ 1.apply(ProducerRequest.scala:42) 在scala.collection.TraversableLike $$ anonfun $ map $ 1.apply(TraversableLike.scala:206) 在scala.collection.TraversableLike $$ anonfun $ map $ 1.apply(TraversableLike.scala:206) at scala.collection.immutable.Range $ ByOne $ class.foreach(Range.scala:282) 在scala.collection.immutable.Range $$ anon $ 1.foreach(Range.scala:274) 在scala.collection.TraversableLike $ class.map(TraversableLike.scala:206) 在scala.collection.immutable.Range.map(Range.scala:39) at kafka.api.ProducerRequest $$ anonfun $ 1.apply(ProducerRequest.scala:42) at kafka.api.ProducerRequest $$ anonfun $ 1.apply(ProducerRequest.scala:38) 在scala.collection.TraversableLike $$ anonfun $ flatMap $ 1.apply(TraversableLike.scala:227) 在scala.collection.TraversableLike $$ anonfun $ flatMap $ 1.apply(TraversableLike.scala:227) at scala.collection.immutable.Range $ ByOne $ class.foreach(Range.scala:282) 在scala.collection.immutable.Range $$ anon $ 1.foreach(Range.scala:274) 在scala.collection.TraversableLike $ class.flatMap(TraversableLike.scala:227) 在scala.collection.immutable.Range.flatMap(Range.scala:39) at kafka.api.ProducerRequest $ .readFrom(ProducerRequest.scala:38) 在kafka.api.RequestKeys $$ anonfun $ 1.apply(RequestKeys.scala:36) 在kafka.api.RequestKeys $$ anonfun $ 1.apply(RequestKeys.scala:36) 在kafka.network.RequestChannel $ Request。(RequestChannel.scala:53) 在kafka.network.Processor.read(SocketServer.scala:353) 在kafka.network.Processor.run(SocketServer.scala:245) 在java.lang.Thread.run(Thread.java:745)
我的消费者代码是
导入结构 进口时间
导入kafka.io import kafka.request_type
class Consumer(kafka.io.IO):
CONSUME_REQUEST_TYPE = kafka.request_type.FETCH
MAX_SIZE = 1024 * 1024
<秒>#秒。 DEFAULT_POLLING_INTERVAL = 2def init (self,topic,partition = 0,host =&#39; localhost&#39;,port = 9092): kafka.io.IO。 init (自我,主机,端口)
#: The topic queue to consume.
self.topic = topic
#: The partition the topic queue is on.
self.partition = partition
#: Offset in the Kafka queue in bytes?
self.offset = 1
#: Maximum message size to consume.
self.max_size = self.MAX_SIZE
self.request_type = self.CONSUME_REQUEST_TYPE
self.polling = self.DEFAULT_POLLING_INTERVAL
self.connect()
消费(自我):
&#34;&#34;&#34;从主题队列中获取数据。 &#34;&#34;&#34;
self.send_consume_request()
return self.parse_message_set_from(self.read_data_response())
def loop(self):
&#34;&#34;&#34;以阻塞方式循环来自队列的传入消息。将检查间隔设置为polling
,以秒为单位。 &#34;&#34;&#34;
while True:
messages = self.consume()
if messages and isinstance(messages, list) and len(messages) > 0:
for message in messages:
yield message
time.sleep(self.polling)
#REQUEST TYPE ID + TOPIC LENGTH + TOPIC + PARTITION + OFFSET + MAX SIZE def request_size(self): 返回2 + 2 + len(self.topic)+ 4 + 8 + 4
def encode_request_size(self): return struct.pack(&#39;&gt; i&#39;,self.request_size())
def encode_request(self): length = len(self.topic)
return struct.pack('>HH%dsiQi' % length, self.request_type, length, self.topic, self.partition, self.offset, self.max_size)
def send_consume_request(self): self.write(self.encode_request_size()) self.write(self.encode_request())
def read_data_response(self): buf_length = struct.unpack(&#39;&gt; i&#39;,self.read(4))[0]
# Start with a 2 byte offset
return self.read(buf_length)[2:]
def parse_message_set_from(self,data): messages = [] 已处理= 0 length = len(data) - 4
while (processed <= length):
message_size = struct.unpack('>i', data[processed:processed+4])[0]
messages.append(kafka.message.parse_from(data[processed:processed + message_size + 4]))
processed += 4 + message_size
self.offset += processed
return messages