我是Kafka的新手。目前我正在尝试使用现有程序(Kafka Consumer)从Kafka Provider获取数据。我能够在一次获取中检索数据。
但我担心的是, 一旦我的消费者的提取结束,提供商可以再次共享新的数据集。 我如何确保我的消费者将获取那些数据,这是提供者在我之前的提取完成后发送的。
请找到以下代码。
import kafka.consumer.ConsumerConfig;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
public class ConsumerGroupExample {
private final ConsumerConnector consumer;
private final String topic;
private ExecutorService executor;
public ConsumerGroupExample(String a_zookeeper, String a_groupId, String a_topic) {
consumer = kafka.consumer.Consumer.createJavaConsumerConnector(
createConsumerConfig(a_zookeeper, a_groupId));
this.topic = a_topic;
}
public void shutdown() {
if (consumer != null) consumer.shutdown();
if (executor != null) executor.shutdown();
try {
if (!executor.awaitTermination(5000, TimeUnit.MILLISECONDS)) {
System.out.println("Timed out waiting for consumer threads to shut down, exiting uncleanly");
}
} catch (InterruptedException e) {
System.out.println("Interrupted during shutdown, exiting uncleanly");
}
}
public void run(int a_numThreads) throws InterruptedException {
Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
topicCountMap.put(topic, new Integer(a_numThreads));
Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer.createMessageStreams(topicCountMap);
//List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
// System.out.println(streams.size());
// now launch all the threads
//
executor = Executors.newFixedThreadPool(a_numThreads);
List<KafkaStream<byte[], byte[]>> streams = null;
// now create an object to consume the messages
//
int threadNumber = 0;
boolean keepRunningThread = false;
for (;;) {
streams = consumerMap.get(topic);
for (final KafkaStream stream : streams) {
keepRunningThread =true;
executor.submit(new ConsumerTest(stream, threadNumber,keepRunningThread ));
//threadNumber++;
}
//TimeUnit.MILLISECONDS.sleep(100);
//System.out.println("Going to sleep ");
}
private static ConsumerConfig createConsumerConfig(String a_zookeeper, String a_groupId) {
Properties props = new Properties();
props.put("zookeeper.connect", a_zookeeper);
props.put("group.id", a_groupId);
props.put("zookeeper.session.timeout.ms", "1600");
props.put("zookeeper.sync.time.ms", "200");
props.put("consumer.timeout.ms","10");
props.put("auto.offset.reset", "smallest");
props.put("auto.commit.interval.ms", "1000");
//props.put("key.deserializer",
// "org.apache.kafka.common.serialization.StringDeserializer");
// props.put("value.deserializer",
// "org.apache.kafka.common.serialization.StringDeserializer");
return new ConsumerConfig(props);
}
public static void main(String[] args) throws InterruptedException {
String zooKeeper = args[0];
String groupId = args[1];
String topic = args[2];
int threads = Integer.parseInt(args[3]);
ConsumerGroupExample example = new ConsumerGroupExample(zooKeeper, groupId, topic);
example.run(threads);
try {
Thread.sleep(10000);
} catch (InterruptedException ie) {
}
example.shutdown();
}
}
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
public class ConsumerTest implements Runnable {
private KafkaStream m_stream;
private int m_threadNumber;
private boolean keepRunningThread
public ConsumerTest(KafkaStream a_stream, int a_threadNumber,boolean keepRunningThread) {
m_threadNumber = a_threadNumber;
m_stream = a_stream;
keepRunningThread = keepRunningThread;
}
public void run() {
ConsumerIterator<byte[], byte[]> it = m_stream.iterator();
while(keepRunningThread)
{
try
{
if(it.hasNext())
{
System.out.println(new String(it.next().message()));
}
}
catch(ConsumerTimeoutException ex)
{
// Nothing serious timeout exception waiting for kafka message
}
}
// System.out.println("Shutting down Thread: " + m_threadNumber);
}
}
答案 0 :(得分:0)
更改代码以保持读取并在没有消息时处理超时。以下代码将继续阅读消息而不会阻止您的消费者。由于 while(it.hasNext()),您当前的代码也将继续阅读,但它会阻止消费者。
从main()中删除以下行,因为它会在10秒后关闭您的消费者
// try
// {
// Thread.sleep(10000);
// } catch (InterruptedException ie) {
// }
// example.shutdown();
还在您的使用者配置中添加consumer.timeout.ms,否则代码将被阻止。使用keepRunningThread标志来控制何时退出使用者循环
while(keepRunningThread)
{
try
{
if(it.hasNext())
{
System.out.println(new String(it.next().message()));
}
}
catch(ConsumerTimeoutException ex)
{
// Nothing serious timeout exception waiting for kafka message
// Wait for 5 seconds
Thread.sleep(5000);
}
}
有关详细信息,请参阅https://kafka.apache.org/07/documentation.html
(从kafka doc复制粘贴) consumer.timeout.ms:默认情况下,此值为-1,如果没有新消息可供使用,则使用者将无限期地阻止。通过将值设置为正整数,如果在指定的超时值之后没有消息可供使用,则会向使用者抛出超时异常。