如何创建Kafka压缩主题

时间:2018-11-20 09:44:36

标签: java apache-kafka

我有一个Kafka应用程序,该应用程序有一个生成主题消息的producer。然后consumer从主题中获取消息,对给定的消息进行逻辑处理,然后将它们生成到另一个主题。 我正在使用ProducerRecordConsumerRecords

我希望我的应用创建2个compacted topics,然后使用它们。如果compacted topics已经存在,只需显示一条消息并继续。

我的SimpleProducer班:

  package com.kafkatest.demo;

import java.util.*;

import org.apache.kafka.clients.producer.*;
public class SimpleProducer extends Thread{

   public static void main(String[] args) throws Exception{

      String topicName = "nodesTopic";
      String key = "Key1";
      String value = "Value-1";

      String key1 = "Key2";
      String value1 = "Value-2";



      Properties props = new Properties();
      props.put("bootstrap.servers", "localhost:9092,localhost:9093");
      props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");         
      props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

      Producer<String, String> producer = new KafkaProducer <>(props);

      ProducerRecord<String, String> record = new ProducerRecord<>(topicName,key,value);
      producer.send(record);


      ProducerRecord<String, String> record2 = new ProducerRecord<>(topicName,key1,value1);
      producer.send(record2);


      ProducerRecord<String, String> record3 = new ProducerRecord<>(topicName,key,value);
      producer.send(record3);


      ProducerRecord<String, String> record4 = new ProducerRecord<>(topicName,key,value);
      producer.send(record4);


      ProducerRecord<String, String> record5 = new ProducerRecord<>(topicName,key,value);
      producer.send(record5);


      ProducerRecord<String, String> record6 = new ProducerRecord<>(topicName,key,value);
      producer.send(record6);
      producer.close();

      System.out.println("SimpleProducer Completed.");
   }
}

我的SimpleConsumer类:

   package com.kafkatest.demo;

import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Properties;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

public class SimpleConsumer extends Thread{

    public static void main(String[] args) {

    Properties props1 = new Properties();
    props1.put("bootstrap.servers", "localhost:9092,localhost:9093");
    props1.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");         
    props1.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");

    Producer<String, String> producer = new KafkaProducer <>(props1);

    Duration duration = Duration.of(2, ChronoUnit.MINUTES);
    String topicName = "nodesTopic";

    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:9092");
    props.put("group.id", "consumer-tutorial");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props); 

    consumer.subscribe(Arrays.asList(topicName));

    try {
        while (true) {
        try {
            Thread.sleep(5000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        consumer.beginningOffsets(consumer.assignment());
          ConsumerRecords<String, String> records = consumer.poll(duration);
          for (ConsumerRecord<String, String> record : records) {
            System.out.println(record.offset() + ": " + record.value());
            System.out.println("Record: " + record.value().toLowerCase());
            ProducerRecord<String, String> record1 = new ProducerRecord<>("forecastTopic", "Key", record.offset() + ". " + record.value().toLowerCase());
            String a = "" + records.count();
            ProducerRecord<String, String> record2 = new ProducerRecord<>("forecastTopic", "Key", record.offset() + ". " + a);
            producer.send(record1);
            producer.send(record2);
          }
        }
      } finally {
        producer.close();
        consumer.close();
      }

    }

}

当我运行bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic forecastTopic --from-beginning并运行生产者几次后,我得到

0. value-1
0. 6
1. value-2
1. 6
2. value-1
2. 6
3. value-1
3. 6
4. value-1
4. 6
5. value-1
5. 6
6. value-1
6. 6
7. value-2
7. 6
8. value-1
8. 6
9. value-1
9. 6
10. value-1
10. 6
11. value-1
11. 6
12. value-1
12. 6
13. value-2
13. 6
14. value-1
14. 6
15. value-1
15. 6
16. value-1
16. 6
17. value-1
17. 6
18. value-1
18. 6
19. value-2
19. 6
20. value-1
20. 6
21. value-1
21. 6
22. value-1
22. 6
23. value-1
23. 6
24. value-1
24. 6
25. value-2
25. 6
26. value-1
26. 6
27. value-1
27. 6
28. value-1
28. 6
29. value-1
29. 6
30. value-1
30. 6
31. value-2
31. 6
32. value-1
32. 6
33. value-1
33. 6
34. value-1
34. 6
35. value-1
35. 6
36. value-1
36. 6
37. value-2
37. 6
38. value-1
38. 6
39. value-1
39. 6
40. value-1
40. 6
41. value-1
41. 6
42. value-1
42. 6
43. value-2
43. 6
44. value-1
44. 6
45. value-1
45. 6
46. value-1
46. 6
47. value-1
47. 6
48. value-1
48. 12
49. value-2
49. 12
50. value-1
50. 12
51. value-1
51. 12
52. value-1
52. 12
53. value-1
53. 12
54. value-1
54. 12
55. value-2
55. 12
56. value-1
56. 12
57. value-1
57. 12
58. value-1
58. 12
59. value-1
59. 12
60. value-1
60. 6
61. value-2
61. 6
62. value-1
62. 6
63. value-1
63. 6
64. value-1
64. 6
65. value-1
65. 6
66. value-1
66. 6
67. value-2
67. 6
68. value-1
68. 6
69. value-1
69. 6
70. value-1
70. 6
71. value-1
71. 6
72. value-1
72. 6
73. value-2
73. 6
74. value-1
74. 6
75. value-1
75. 6
76. value-1
76. 6
77. value-1
77. 6
78. value-1
78. 6
79. value-2
79. 6
80. value-1
80. 6
81. value-1
81. 6
82. value-1
82. 6
83. value-1
83. 6

我将log.cleanup.policy=compact放在了server.properties文件中,但是似乎没有用,因为我在该主题中拥有所有83个偏移量。

谢谢。

3 个答案:

答案 0 :(得分:4)

只要有人发现这很有用,这就是您要压缩特定主题并且不想将所有主题都设置为在服务器级别进行压缩时要使用的实际CLI命令。

bin/kafka-topics --alter --topic my_topic_name --zookeeper my_zookeeper:2181 --config cleanup.policy=compact

假设正在从融合的基本目录中运行上述内容。我相信,如果仅使用常规的Apache Kafka发行版,则必须更改命令以调用bin/kafka-topics.sh

答案 1 :(得分:2)

使用这个命令:

kafka-topics --topic topic_name --bootstrap-server localhost:9092 --config "cleanup.policy=compact"

答案 2 :(得分:1)

在server.properties中设置log.cleanup.policy=compact时,它将是创建新主题时的默认策略。如果您在创建主题后更改server.properties,则主题配置将不会更改。

您可以更改主题配置以设置cleanup.policy=compact

由于压缩日志是由日志清理器进行的,因此您可能希望在主题上设置特定的delete.retention.ms,因为默认保留时间为24小时。

最后,活动分段不会发生压缩。 参见Kafka Log Compaction not starting