Kafka Consumer不提取所有消息

时间:2016-02-16 11:18:34

标签: python apache-kafka producer-consumer

我试图在Kafka中创建新主题时启动动态消费者,但动态启动的消费者总是丢失开始/第一个消息但是从那里消费消息。我正在使用kafka-python模块并使用更新的KafkaConsumer和KafkaProducer。

制作人代码

<html>
<head>
<title>Heat map</title>
</head>
<body>
<div id='tooltip' class='hidden'>
 <p><span id='value'></p>
</div>
<script src='http://d3js.org/d3.v3.min.js'></script>
</select>
<div id='chart' style='overflow:auto; width:960px; height:700px;'></div>
<script type='text/javascript'>
var margin = { top: 75, right: 10, bottom: 50, left: 100 },
cellSize=12;
col_number=34;
row_number=34;
width = cellSize*col_number, // - margin.left - margin.right,
height = cellSize*row_number , // - margin.top - margin.bottom,
gridSize = Math.floor(width / 24),
legendElementWidth = cellSize*2.5,
colorBuckets = 11,
colors = ['#FFFFFF','#F1EEF6','#E6D3E1','#DBB9CD','#D19EB9','#C684A4','#BB6990','#B14F7C','#A63467','#9B1A53','#91003F'];
hcrow = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34], // change to gene name or probe id
hccol = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34], // change to gene name or probe id
rowLabel = ['From 0-14', 'From 15-29', 'From 30-44', 'From 45-59', 'From 60-74', 'From 75-89', 'From 90-104', 'From 105-119', 'From 120-134', 'From 135-149', 'From 150-164', 'From 165-179', 'From 180-194', 'From 195-209', 'From 210-224', 'From 225-239', 'From 240-254', 'From 255-269', 'From 270-284', 'From 285-299', 'From 300-314', 'From 315-329', 'From 330-344', 'From 345-359', 'From 360-374', 'From 375-389', 'From 390-404', 'From 405-419', 'From 420-434', 'From 435-449', 'From 450-464', 'From 465-479', 'From 480-494', 'From 495-509'], // change to gene name or probe id
colLabel = ['To 0-14', 'To 15-29', 'To 30-44', 'To 45-59', 'To 60-74', 'To 75-89', 'To 90-104', 'To 105-119', 'To 120-134', 'To 135-149', 'To 150-164', 'To 165-179', 'To 180-194', 'To 195-209', 'To 210-224', 'To 225-239', 'To 240-254', 'To 255-269', 'To 270-284', 'To 285-299', 'To 300-314', 'To 315-329', 'To 330-344', 'To 345-359', 'To 360-374', 'To 375-389', 'To 390-404', 'To 405-419', 'To 420-434', 'To 435-449', 'To 450-464', 'To 465-479', 'To 480-494', 'To 495-509']; // change to contrast name
d3.tsv("data_heatmap.tsv",

function(d) {
  return {
    row:   +d.row_idx,
    col:   +d.col_idx,
    value: +d.repetitions
  };
},

function(error, data) {
  var colorScale = d3.scale.quantile()
      .domain([0, 10])
      .range(colors);

  var svg = d3.select("#chart").append("svg")
      .attr("width", width + margin.left + margin.right)
      .attr("height", height + margin.top + margin.bottom)
      .append("g")
      .attr("transform", "translate(" + margin.left + "," + margin.top + ")")
  ;

  var heatMap = svg.append("g")
      .attr("class","g3")
      .selectAll(".cellg")
      .data(data,function(d){return d.row+":"+d.col;})
      .enter()
      .append("rect")
      .attr("x", function(d) { return hccol.indexOf(d.col) * cellSize; })
      .attr("y", function(d) { return hcrow.indexOf(d.row) * cellSize; })
      .attr("class", function(d){return "cell cell-border cr"+(d.row-1)+" cc"+(d.col-1);})
      .attr("width", cellSize)
      .attr("height", cellSize)
      .style("fill", function(d) { return colorScale(d.value); })
  ;

</script>
</body>
</html>

和消费者的代码是

producer = KafkaProducer(bootstrap_servers='localhost:9092')
record_metadata = producer.send(topic, data)

请建议解决此问题或我必须包含在我的生产者和消费者实例中的任何配置。

1 个答案:

答案 0 :(得分:2)

您可以将auto_offset_reset设置为最早。

创建新的消费者流时,它从最新的偏移量(auto_offset_reset的默认值)开始,您将错过在消费者未启动时发送的消息。

您可以在kafka python doc中阅读相关内容。相关部分如下

  

auto_offset_reset(str) - 用于重置偏移的策略   OffsetOutOfRange错误:'earliest'将移动到最旧的可用状态   消息,'最新'将转移到最近。任何其他价值都会   举起例外。默认值:'latest'。