从Kafka中删除空主题

时间:2018-05-10 19:59:56

标签: apache-kafka

我想删除所有空的Kafka主题(定期从cron中删除)。我一直无法在文档中找到一个命令来执行此操作?转到脚本:

首先我设置了delete.topic.enable=true

问题是找到主题中当前的实际消息数。我可以用以下内容显示总消息数:

$KAFKA_DIR/bin/kafka-run-class.sh kafka.tools.GetOffsetShell \
  --broker-list $KAFKA_BOOTSTRAP --topic $TOPIC --time -1

然而,这包括已过期的邮件?如何在主题中找到实际的当前计数,而不是使用消费者?

2 个答案:

答案 0 :(得分:0)

kafka.tools.GetOffsetShell会计算抵消数而不是实际消息数。

要获取为主题存储的所有消息,您需要在开头寻找消费者并总结每个分区的结果。

答案 1 :(得分:0)

如果您已将ELK堆栈与Kafka集群集成在一起,则此任务将变得非常容易。

第1步:在Kafka集群中找到所有主题。

async function getKafkaTopics() {
    let total_kafka_topics = [];
    return new Promise(function (resolve, reject) {
        request({
            headers: {
                'auth': KAFKA_AUTH
            },
            uri: `${KAFKA_URL}/admin/topics`,
            method: 'GET',
            json: true,
        }, function (err, res, body) {
            if (err) {
                console.log("Error encountered while getting Kafka Topics: ",err);
                reject(err);
            }
            if (body) { 
                body.forEach(function (item) { 
                    if(item && item.name)
                        total_kafka_topics.push(item.name) 
                }); 
            }
            resolve(total_kafka_topics);
        });
    })
}

第2步:尝试查找最近7天内是否有任何主题的匹配:

async function callElasticSearch(topic) {
  console.log("Calling elastic search for topic: ",topic)
  var url = ELASTIC_SEARCH_URL + `/${INDEX}/_search?q=${topic}`
  let body = {
    "query": {
        "range": {
            "timestamp": {
                "gte": "now-7d/d",
                "lt": "now/d"
            }
        }
    }
}
var options = {
    url: url,
    headers: {
        'Authorization': ELASTIC_AUTHORIZATION
    },
    body: body, json: true
}
return new Promise(function (resolve, reject) {
    request.get(options, function (err, res, body) {
        if (err) {
            console.log(`Error encountered while calling elastic search for topic : ${topic}`,err)
            reject(err)
        } else {
            let obj = {
                topic: topic,
                body: body
            }
            console.log("Response Recieved for: ",topic)
            resolve(obj);
        }
    });
})
}

您可以像下面这样迭代地调用此函数:

async function start() {
    let empty_topic = [];
    console.log("Start Time: ", new Date())
    let total_kafka_topics = await getKafkaTopics();
    console.log("Total Kafka Topics: ", total_kafka_topics.length)
    const { results, errors } = await PromisePool
        .withConcurrency(5)
        .for(total_kafka_topics)
        .process(async topic => {
            const elasticResponse = await callElasticSearch(topic)
            if (elasticResponse.body && elasticResponse.body.hits && elasticResponse.body.hits.total == 0 && elasticResponse.topic) {
                empty_topic.push(elasticResponse.topic)
            }
        })
    console.log("Number of empty topics from past 7 days: ", empty_topic.length)
    console.log("Empty Topics: %j", empty_topic)
    console.log("Time Ended: ", new Date())
}