具有多个线程的Kafka Producer-Java

时间:2019-05-09 09:26:59

标签: java multithreading apache-kafka

我想使用多个线程执行Kafka生产者。以下是我尝试过的代码。我不知道如何在Kafka生产者中实现线程,因为我不熟悉线程编程。 下面是我的生产者的代码。

import org.apache.kafka.clients.producer.Callback;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.serialization.StringSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class KafkaProducerWithThread  {
    //init params
    final String bootstrapServer = "127.0.0.1:9092";
    final String topicName = "spark-data-topic";
    final String csvFileName = "unique_products.csv";
    final static int MAX_THREAD = 2;  //created number of threads

    //Logger
    final Logger logger = LoggerFactory.getLogger(KafkaProducerWithThread.class);

    public KafkaProducerWithThread() throws FileNotFoundException {
    }

    public static void main(String[] args) throws IOException {

        new KafkaProducerWithThread().runProducer();
    }

    public void runProducer() throws IOException {

        //Read the CSV file from Resources folder as BufferedReader
        ClassLoader classLoader = new KafkaProducerWithThread().getClass().getClassLoader();
        BufferedReader reader = new BufferedReader(new FileReader(classLoader.getResource(csvFileName).getFile()));

        //Create a Kafka Producer
        org.apache.kafka.clients.producer.KafkaProducer<String, String> producer = createKafkaProducer();

        //Kafka Producer Metrics
        Metric requestTotalMetric = null;
        for (Map.Entry<MetricName, ? extends Metric> entry : producer.metrics().entrySet()) {
            if ("request-total".equals(entry.getKey().name())) {
                requestTotalMetric = entry.getValue();
            }
        }

        //Thread
        ExecutorService executorService = Executors.newFixedThreadPool(MAX_THREAD);


        //Read the CSV file line by line
        String line = "";
        int i = 0;
        while ((line = reader.readLine()) != null) {
            i++;
            String key = "products_" + i;

            //Create a ProducerRecord
            ProducerRecord<String, String> csvProducerRecord = new ProducerRecord<>(topicName, key, line.trim());

            //Send the data - Asynchronously
            producer.send(csvProducerRecord, new Callback() {
                @Override
                public void onCompletion(RecordMetadata recordMetadata, Exception e) {
                    //executes every time a record is sent successfully or an exception is thrown
                    if (e == null) {
                        //the record was sent successfully
//                        logger.info("Received new metadata. \n" +
//                                "Topic: " + recordMetadata.topic() + "\n" +
//                                "Partition: " + recordMetadata.partition() + "\n" +
//                                "Offset: " + recordMetadata.offset() + "\n" +
//                                "Timestamp: " + recordMetadata.timestamp());
                    } else {
                        logger.error("Error while producing", e);
                    }
                }
            });

            if (i % 1000 == 0){
                logger.info("Record #: " + i + " Request rate: " + requestTotalMetric.metricValue());
            }

        }

        //Adding a shutdown hook
        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
            logger.info("Stopping the Producer!");
            producer.flush();
            producer.close();
            logger.info("Stopped the Producer!");
        }));
    }

    public org.apache.kafka.clients.producer.KafkaProducer<String, String> createKafkaProducer() {
        //Create Producer Properties
        Properties properties = new Properties();
        properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer);
        properties.setProperty(ProducerConfig.ACKS_CONFIG, "all");
        properties.setProperty(ProducerConfig.RETRIES_CONFIG, "5");
        properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        properties.setProperty(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true"); // For an idempotent producer
        //kafka can detect whether it's a duplicate data based on the producer request id.

        //Create high throughput Producer at the expense of latency & CPU
        properties.setProperty(ProducerConfig.COMPRESSION_TYPE_CONFIG, "snappy");
        properties.setProperty(ProducerConfig.LINGER_MS_CONFIG, "60");
        properties.setProperty(ProducerConfig.BATCH_SIZE_CONFIG, Integer.toString(32 * 1024)); //32KB batch size

        //Create Kafka Producer
        org.apache.kafka.clients.producer.KafkaProducer<String, String> csvProducer = new org.apache.kafka.clients.producer.KafkaProducer<String, String>(properties);
        return csvProducer;
    }
}

有人可以帮助我在我的Kafka生产者程序中实现线程吗? 我的生产者将产生超过一百万条记录,因此我想为它实现线程。我知道ExecutorService用于线程编程,但是我不确定在这种情况下如何实现。 谢谢。

1 个答案:

答案 0 :(得分:3)

  • 创建如下所示的MessageSender类。
  • 在创建生产者类之后,创建一个新的MesssageSender对象,将生产者记录和生产者作为构造函数args。
  • 调用executorService.submit()来执行任务。

          class Producer {
             ExecutorService executorService = 
              Executors.newFixedThreadPool(MAX_THREAD);
             //Read the CSV file line by line
             String line = "";
             int i = 0;
              while ((line = reader.readLine()) != null) {
                 //create produver record
                 ProducerRecord<String, String> csvProducerRecord = new ProducerRecord<>(topicName, key, line.trim());
                 MessageSender sendMessage= new MessageSender(csvProducerRecord,producer);
                executorService.submit()...
              }
                }
    
            //Thread class          
            class MessageSender implements Runnable<>{
             MessageSender(Producerrecord,producer{
            //store in class level variable in thread class
            }
            public void run(){
                producer.send(csvProducerRecord...);
            }