我一直在研究Kafka twitter流式传输数据。
我正在关注以下链接中的示例: http://www.hahaskills.com/tutorials/kafka/Twitter_doc.html
我可以使用Producer代码并且工作正常。能够获得推特信息并发送给Kafka Producer。
我无法使用消费者代码,因为它已被抛弃为许多API的弃用错误。
以下是消费者代码:
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import kafka.consumer.Consumer;
//import kafka.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
//import kafka.consumer.KafkaStream;
//import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
//import org.apache.kafka.clients.producer.KafkaProducer;
public class KafkaConsumer {
private final ConsumerConnector consumer;
private final String topic;
public KafkaConsumer(String zookeeper, String groupId, String topic) {
Properties props = new Properties();
props.put("zookeeper.connect", zookeeper);
props.put("group.id", groupId);
props.put("zookeeper.session.timeout.ms", "500");
props.put("zookeeper.sync.time.ms", "250");
props.put("auto.commit.interval.ms", "1000");
consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(props));
this.topic = topic;
}
public void testConsumer() {
System.out.println("Test Con called");
Map<String, Integer> topicCount = new HashMap<>();
topicCount.put(topic, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> consumerStreams = consumer.createMessageStreams(topicCount);
List<KafkaStream<byte[], byte[]>> streams = consumerStreams.get(topic);
System.out.println("For");
for (final KafkaStream stream : streams) {
ConsumerIterator<byte[], byte[]> it = stream.iterator();
System.out.println("Size"+it.length());
while (it.hasNext()) {
System.out.println("Stream");
System.out.println("Message from Single Topic: " + new String(it.next().message()));
}
}
if (consumer != null) {
consumer.shutdown();
}
}
public static void main(String[] args) {
System.out.println("Started");
String topic="twittertopic";
KafkaConsumer simpleTWConsumer = new KafkaConsumer("localhost:XXXX", "testgroup", topic);
simpleTWConsumer.testConsumer();
System.out.println("End");
}
}
抛出错误:不推荐使用ConsumerConnector,ConsumerIterator,KafkaStream。
ConsumerConfig不可见。
是否有此示例代码的固定版本(推特的Kafka消费者)?
答案 0 :(得分:2)
您所关注的教程非常陈旧,并且使用了已弃用的旧Scala Kafka客户端,请参阅http://kafka.apache.org/documentation/#legacyapis
已弃用的类是:
kafka.consumer.*
和kafka.javaapi.consumer
使用org.apache.kafka.clients.consumer.*
下的较新Java消费者
kafka.producer.*
和kafka.javaapi.producer
使用org.apache.kafka.clients.producer.*
下的较新的Java Producer
除了使用已弃用的类之外,您的代码大多是正确的,我只需修复一些导入。见下面的固定版本。使用它我能够将我正在制作的消息用于名为twittertopic
的主题。
package example;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
public class MyConsumer {
private final ConsumerConnector consumer;
private final String topic;
public MyConsumer(String zookeeper, String groupId, String topic) {
Properties props = new Properties();
props.put("zookeeper.connect", zookeeper);
props.put("group.id", groupId);
props.put("zookeeper.session.timeout.ms", "500");
props.put("zookeeper.sync.time.ms", "250");
props.put("auto.commit.interval.ms", "1000");
consumer = Consumer.createJavaConsumerConnector(new ConsumerConfig(props));
this.topic = topic;
}
public void testConsumer() {
Map<String, Integer> topicCount = new HashMap<>();
topicCount.put(topic, 1);
Map<String, List<KafkaStream<byte[], byte[]>>> consumerStreams = consumer.createMessageStreams(topicCount);
List<KafkaStream<byte[], byte[]>> streams = consumerStreams.get(topic);
for (final KafkaStream stream : streams) {
ConsumerIterator<byte[], byte[]> it = stream.iterator();
while (it.hasNext()) {
System.out.println("Message from Single Topic: " + new String(it.next().message()));
}
}
if (consumer != null) {
consumer.shutdown();
}
}
public static void main(String[] args) {
System.out.println("Started");
String topic = "twittertopic";
MyConsumer simpleTWConsumer = new MyConsumer("localhost:2181", "testgroup", topic);
simpleTWConsumer.testConsumer();
System.out.println("End");
}
}
虽然可以使用上面的代码,但下一个主要的Kafka版本可能会删除当前不推荐使用的类,因此您不应该使用这些编写新逻辑。
相反,您应该开始使用Java客户端,您可以使用Github上提供的示例:https://github.com/apache/kafka/tree/trunk/examples/src/main/java/kafka/examples
使用新的Java Consumer,您的逻辑将如下所示:
import java.util.Arrays;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
public class MyConsumer {
static final String TOPIC = "twittertopic";
static final String GROUP = "testgroup";
public static void main(String[] args) {
System.out.println("Started");
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("group.id", GROUP);
props.put("auto.commit.interval.ms", "1000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
try (KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);) {
consumer.subscribe(Arrays.asList(TOPIC));
for (int i = 0; i < 1000; i++) {
ConsumerRecords<String, String> records = consumer.poll(1000L);
System.out.println("Size: " + records.count());
for (ConsumerRecord<String, String> record : records) {
System.out.println("Received a message: " + record.key() + " " + record.value());
}
}
}
System.out.println("End");
}
}