所以我正在尝试使用Kafka流进行交互式查询。我有Zookeeper和Kafka在本地运行(在Windows上)。我使用C:\ temp作为存储文件夹,对于Zookeeper和Kafka。
我已经设置了这样的主题
kafka-topics.bat --zookeeper localhost:2181 --create --replication-factor 1 --partitions 1 --topic rating-submit-topic
kafka-topics.bat --zookeeper localhost:2181 --create --replication-factor 1 --partitions 1 --topic rating-output-topic
阅读我已完成此问题
并且还阅读了这篇类似的帖子,该帖子最初听起来与我的问题相同:Cannot access KTable from a different app as StateStore
这就是我的设置。那么问题是什么?
正如我所说,我正在尝试创建自己的应用程序,它允许使用自定义Akka Http REST Api(推荐的RPC调用)进行交互式查询,以允许我查询KTable
。实际的流处理似乎按预期发生,我能够打印KTable
的结果,它们与主题产生的结果相匹配。
所以存储方面似乎正在发挥作用
尝试使用Streams.allMetadata()
方法时,似乎会出现问题,它会返回一个空列表。
我正在使用
制片人代码
这是我的制作人的代码
package Processing.Ratings {
import java.util.concurrent.TimeUnit
import Entities.Ranking
import Serialization.JSONSerde
import Topics.RatingsTopics
import scala.util.Random
import org.apache.kafka.clients.producer.ProducerRecord
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.common.serialization.Serdes
import Utils.Settings
import org.apache.kafka.clients.producer.ProducerConfig
object RatingsProducerApp extends App {
run()
private def run(): Unit = {
val jSONSerde = new JSONSerde[Ranking]
val random = new Random
val producerProps = Settings.createBasicProducerProperties
val rankingList = List(
Ranking("jarden@here.com","sacha@here.com", 1.5f),
Ranking("miro@here.com","mary@here.com", 1.5f),
Ranking("anne@here.com","margeret@here.com", 3.5f),
Ranking("frank@here.com","bert@here.com", 2.5f),
Ranking("morgan@here.com","ruth@here.com", 1.5f))
producerProps.put(ProducerConfig.ACKS_CONFIG, "all")
System.out.println("Connecting to Kafka cluster via bootstrap servers " +
s"${producerProps.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)}")
// send a random string from List event every 100 milliseconds
val rankingProducer = new KafkaProducer[String, Array[Byte]](
producerProps, Serdes.String.serializer, Serdes.ByteArray.serializer)
//while (true) {
for (i <- 0 to 10) {
val ranking = rankingList(random.nextInt(rankingList.size))
val rankingBytes = jSONSerde.serializer().serialize("", ranking)
System.out.println(s"Writing ranking ${ranking} to input topic ${RatingsTopics.RATING_SUBMIT_TOPIC}")
rankingProducer.send(new ProducerRecord[String, Array[Byte]](
RatingsTopics.RATING_SUBMIT_TOPIC, ranking.toEmail, rankingBytes))
Thread.sleep(100)
}
Runtime.getRuntime.addShutdownHook(new Thread(() => {
rankingProducer.close(10, TimeUnit.SECONDS)
}))
}
}
}
流媒体代码
这是流代码
def createRatingStreamsProperties() : Properties = {
val props = createBasicStreamProperties
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "ratings-application")
props.put(StreamsConfig.CLIENT_ID_CONFIG, "ratings-application-client")
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass)
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass)
props
}
private def createBasicStreamProperties() : Properties = {
val props = new Properties()
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootStrapServers)
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass)
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass)
// Records should be flushed every 10 seconds. This is less than the default
// in order to keep this example interactive.
props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10000.asInstanceOf[Object])
// For illustrative purposes we disable record caches
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0.asInstanceOf[Object])
props
}
和实际代码
import java.util.Properties
import java.util.concurrent.TimeUnit
import org.apache.kafka.common.serialization._
import org.apache.kafka.streams._
import org.apache.kafka.streams.kstream._
import Entities.Ranking
import Serialization.JSONSerde
import Topics.RatingsTopics
import Utils.Settings
package Processing.Ratings {
import Stores.StateStores
import org.apache.kafka.streams.state.HostInfo
class DummyRankingReducer extends Reducer[Ranking] {
override def apply(value1: Ranking, value2: Ranking): Ranking = {
value2
}
}
class RankingByEmailInitializer extends Initializer[List[Ranking]] {
override def apply(): List[Ranking] = List[Ranking]()
}
class RankingByEmailAggregator extends Aggregator[String, Ranking,List[Ranking]] {
override def apply(aggKey: String, value: Ranking, aggregate: List[Ranking]) = {
value :: aggregate
}
}
object RatingStreamProcessingApp extends App {
run()
private def run() : Unit = {
val stringSerde = Serdes.String
val rankingSerde = new JSONSerde[Ranking]
val listRankingSerde = new JSONSerde[List[Ranking]]
val builder: KStreamBuilder = new KStreamBuilder
val rankings = builder.stream(stringSerde, rankingSerde, RatingsTopics.RATING_SUBMIT_TOPIC)
val rankingTable = rankings.groupByKey(stringSerde,rankingSerde)
.aggregate(
new RankingByEmailInitializer(),
new RankingByEmailAggregator(),
listRankingSerde,
StateStores.RANKINGS_BY_EMAIL_STORE
)
rankingTable.toStream.print()
val streams: KafkaStreams = new KafkaStreams(builder, Settings.createRatingStreamsProperties)
val restEndpoint:HostInfo = new HostInfo(Settings.restApiDefaultHostName, Settings.restApiDefaultPort)
System.out.println(s"Connecting to Kafka cluster via bootstrap servers ${Settings.bootStrapServers}")
System.out.println(s"REST endpoint at http://${restEndpoint.host}:${restEndpoint.port}")
// Always (and unconditionally) clean local state prior to starting the processing topology.
// We opt for this unconditional call here because this will make it easier for you to play around with the example
// when resetting the application for doing a re-run (via the Application Reset Tool,
// http://docs.confluent.io/current/streams/developer-guide.html#application-reset-tool).
//
// The drawback of cleaning up local state prior is that your app must rebuilt its local state from scratch, which
// will take time and will require reading all the state-relevant data from the Kafka cluster over the network.
// Thus in a production scenario you typically do not want to clean up always as we do here but rather only when it
// is truly needed, i.e., only under certain conditions (e.g., the presence of a command line flag for your app).
// See `ApplicationResetExample.java` for a production-like example.
//streams.cleanUp();
streams.start()
val restService = new RatingRestService(streams, restEndpoint)
restService.start()
//****************************************************************
// WHY DOES METADATA NOT WORK WHEN THERE IS CLEARLY A STORE IN USE
// WHY DOES METADATA NOT WORK WHEN THERE IS CLEARLY A STORE IN USE
// WHY DOES METADATA NOT WORK WHEN THERE IS CLEARLY A STORE IN USE
// WHY DOES METADATA NOT WORK WHEN THERE IS CLEARLY A STORE IN USE
//****************************************************************
val SIZE = streams.allMetadata.size()
val SIZE2 = streams.allMetadataForStore(StateStores.RANKINGS_BY_EMAIL_STORE).size()
import org.apache.kafka.streams.state.KeyValueIterator
import org.apache.kafka.streams.state.QueryableStoreTypes
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore
val keyValueStore = streams.store(StateStores.RANKINGS_BY_EMAIL_STORE, QueryableStoreTypes.keyValueStore)
val range = keyValueStore.all
val HASNEXT = range.hasNext
import org.apache.kafka.streams.KeyValue
while (range.hasNext ) {
val next = range.next
System.out.println(String.format("key: %s | value: %s", next.key, next.value))
}
Runtime.getRuntime.addShutdownHook(new Thread(() => {
streams.close(10, TimeUnit.SECONDS)
restService.stop
}))
//return unit
()
}
}
}
我有这个配置的地方
kafka {
bootStrapServers = "localhost:9092"
zooKeepers = "zookeeper:2181"
schemaRegistryUrl = "http://localhost:8081"
partition = 0,
restApiDefaultHostName = "localhost",
restApiDefaultPort = "8080"
}
REST服务
package Processing.Ratings
import org.apache.kafka.streams.KafkaStreams
import org.apache.kafka.streams.state.StreamsMetadata
import java.util.stream.Collectors
import Entities.HostStoreInfo
import org.apache.kafka.common.serialization.Serializer
import org.apache.kafka.connect.errors.NotFoundException
import scala.collection.JavaConverters._
/**
* Looks up StreamsMetadata from KafkaStreams
*/
class MetadataService(val streams: KafkaStreams) {
/**
* Get the metadata for all of the instances of this Kafka Streams application
*
* @return List of { @link HostStoreInfo}
*/
def streamsMetadata() : List[HostStoreInfo] = {
// Get metadata for all of the instances of this Kafka Streams application
val metadata = streams.allMetadata
return mapInstancesToHostStoreInfo(metadata)
}
/**
* Get the metadata for all instances of this Kafka Streams application that currently
* has the provided store.
*
* @param store The store to locate
* @return List of { @link HostStoreInfo}
*/
def streamsMetadataForStore(store: String) : List[HostStoreInfo] = {
// Get metadata for all of the instances of this Kafka Streams application hosting the store
val metadata = streams.allMetadataForStore(store)
return mapInstancesToHostStoreInfo(metadata)
}
/**
* Find the metadata for the instance of this Kafka Streams Application that has the given
* store and would have the given key if it exists.
*
* @param store Store to find
* @param key The key to find
* @return { @link HostStoreInfo}
*/
def streamsMetadataForStoreAndKey[T](store: String, key: T, serializer: Serializer[T]) : HostStoreInfo = {
// Get metadata for the instances of this Kafka Streams application hosting the store and
// potentially the value for key
val metadata = streams.metadataForKey(store, key, serializer)
if (metadata == null)
throw new NotFoundException(
s"No metadata could be found for store : ${store}, and key type : ${key.getClass.getName}")
return new HostStoreInfo(metadata.host, metadata.port, metadata.stateStoreNames.asScala.toList)
}
def mapInstancesToHostStoreInfo(metadatas : java.util.Collection[StreamsMetadata]) : List[HostStoreInfo] = {
metadatas.stream.map[HostStoreInfo](metadata =>
HostStoreInfo(
metadata.host(),
metadata.port,
metadata.stateStoreNames.asScala.toList))
.collect(Collectors.toList())
.asScala.toList
}
}
这是REST服务(我现在只尝试让“实例”路由工作)。
package Processing.Ratings
import org.apache.kafka.streams.KafkaStreams
import org.apache.kafka.streams.state.HostInfo
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model._
import akka.http.scaladsl.server.Directives._
import akka.stream.ActorMaterializer
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import spray.json.DefaultJsonProtocol._
import Entities.AkkaHttpEntitiesJsonFormats._
import Entities._
import akka.http.scaladsl.marshalling.ToResponseMarshallable
import scala.concurrent.Future
object RestService {
val DEFAULT_REST_ENDPOINT_HOSTNAME = "localhost"
}
class RatingRestService(val streams: KafkaStreams, val hostInfo: HostInfo) {
val metadataService = new MetadataService(streams)
var bindingFuture: Future[Http.ServerBinding] = null
implicit val system = ActorSystem("rating-system")
implicit val materializer = ActorMaterializer()
implicit val executionContext = system.dispatcher
def start() : Unit = {
val emailRegexPattern = """\w+""".r
val route =
path("ratingByEmail" / emailRegexPattern) { email =>
get {
//TODO : This would come from Kafka store, either local or remote
complete(ToResponseMarshallable.apply(List[Ranking](
Ranking("fred@here.com", "sacha@there.com", 4.0f),
Ranking("sam@here.com", "sacha@there.com", 2.0f)))
)
}
} ~
path("instances") {
get {
val x = metadataService.streamsMetadata
complete(ToResponseMarshallable.apply(metadataService.streamsMetadata))
}
}
bindingFuture = Http().bindAndHandle(route, hostInfo.host, hostInfo.port)
println(s"Server online at http://${hostInfo.host}:${hostInfo.port}/\n")
Runtime.getRuntime.addShutdownHook(new Thread(() => {
bindingFuture
.flatMap(_.unbind()) // trigger unbinding from the port
.onComplete(_ => system.terminate()) // and shutdown when done
}))
}
def stop() : Unit = {
bindingFuture
.flatMap(_.unbind()) // trigger unbinding from the port
.onComplete(_ => system.terminate()) // and shutdown when done
}
def thisHost(hostStoreInfo: HostStoreInfo) : Boolean = {
hostStoreInfo.host.equals(hostInfo.host()) &&
hostStoreInfo.port == hostInfo.port
}
}
以下证明商店中有数据
这是我运行生产者1st,然后是流,然后是生产者(再次运行)。
看看如何显示KTable
的结果,然后我启动了制作人,并通过流选择的主题推送了更多的消息
但是当我查询我的REST端点以尝试使用localhost:8080/instances
获取元数据时,我只能得到一个空列表[]
我原本预计上面的流代码中的这些行会返回一些元数据,商店里有明显的东西,为什么没有元数据
val SIZE = streams.allMetadata.size()
val SIZE2 = streams.allMetadataForStore(StateStores.RANKINGS_BY_EMAIL_STORE).size()
这两个都返回0,同时使用此代码遍历商店中的项目
import org.apache.kafka.streams.state.KeyValueIterator
import org.apache.kafka.streams.state.QueryableStoreTypes
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore
val keyValueStore = streams.store(StateStores.RANKINGS_BY_EMAIL_STORE, QueryableStoreTypes.keyValueStore)
val range = keyValueStore.all
val HASNEXT = range.hasNext
import org.apache.kafka.streams.KeyValue
while (range.hasNext ) {
val next = range.next
System.out.println(String.format("key: %s | value: %s", next.key, next.value))
}
从商店
生成数据我知道REST api工作正常,因为硬编码的测试路由工作正常
我做错了什么???
答案 0 :(得分:6)
所以我想出来了,结果是由于缺少配置值
props.put(StreamsConfig.APPLICATION_SERVER_CONFIG, "localhost:8080")
我添加了Akka Htpp REST API http://localhost:8080/instance
开始工作。但后来我开始得到这个奇怪的异常
org.apache.kafka.streams.errors.InvalidStateStoreException: the state store, my-key-value-store, may have migrated to another instance.
at org.apache.kafka.streams.state.internals.StreamThreadStateStoreProvider.stores(StreamThreadStateStoreProvider.java:49)
at org.apache.kafka.streams.state.internals.QueryableStoreProvider.getStore(QueryableStoreProvider.java:55)
at org.apache.kafka.streams.KafkaStreams.store(KafkaStreams.java:699)
我决定我需要做的是执行一些重试逻辑,我这样做了:
<强>重试强>
我从这里借来的:https://gist.github.com/Mortimerp9/5430595
package Utils
import scala.concurrent._
import scala.concurrent.duration._
object Retry {
/**
* exponential back off for retry
*/
def exponentialBackoff(r: Int): Duration = scala.math.pow(2, r).round * 500 milliseconds
def noIgnore(t: Throwable): Boolean = false
/**
* retry a particular block that can fail
*
* @param maxRetry how many times to retry before to giveup
* @param deadline how long to retry before giving up; default None
* @param backoff a back-off function that returns a Duration after which to retry. default is an exponential backoff at 100 milliseconds steps
* @param ignoreThrowable if you want to stop retrying on a particular exception
* @param block a block of code to retry
* @param ctx an execution context where to execute the block
* @returns an eventual Future succeeded with the value computed or failed with one of:
* `TooManyRetriesException` if there were too many retries without an exception being caught. Probably impossible if you pass decent parameters
* `DeadlineExceededException` if the retry didn't succeed before the provided deadline
* `TimeoutException` if you provide a deadline and the block takes too long to execute
* `Throwable` the last encountered exception
*/
def retry[T](maxRetry: Int,
deadline: Option[Deadline] = None,
backoff: (Int) => Duration = exponentialBackoff,
ignoreThrowable: Throwable => Boolean = noIgnore)(block: => T)(implicit ctx: ExecutionContext): Future[T] = {
class TooManyRetriesException extends Exception("too many retries without exception")
class DeadlineExceededException extends Exception("deadline exceded")
val p = Promise[T]
def recursiveRetry(retryCnt: Int, exception: Option[Throwable])(f: () => T): Option[T] = {
if (maxRetry == retryCnt
|| deadline.isDefined && deadline.get.isOverdue) {
exception match {
case Some(t) =>
p failure t
case None if deadline.isDefined && deadline.get.isOverdue =>
p failure (new DeadlineExceededException)
case None =>
p failure (new TooManyRetriesException)
}
None
} else {
val success = try {
val rez = if (deadline.isDefined) {
Await.result(future(f()), deadline.get.timeLeft)
} else {
f()
}
Some(rez)
} catch {
case t: Throwable if !ignoreThrowable(t) =>
blocking {
val interval = backoff(retryCnt).toMillis
Thread.sleep(interval)
}
recursiveRetry(retryCnt + 1, Some(t))(f)
case t: Throwable =>
p failure t
None
}
success match {
case Some(v) =>
p success v
Some(v)
case None => None
}
}
}
def doBlock() = block
Future {
recursiveRetry(0, None)(doBlock)
}
p.future
}
}
我称之为
def printStoreMetaData(streams:KafkaStreams) : Unit = {
import org.apache.kafka.streams.state.KeyValueIterator
import org.apache.kafka.streams.state.QueryableStoreTypes
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore
val keyValueStoreTry = waitUntilStoreIsQueryable(
StateStores.RANKINGS_BY_EMAIL_STORE,
QueryableStoreTypes.keyValueStore[String,List[Ranking]](),
streams
) match {
case Success(keyValueStore) => {
val SIZE = streams.allMetadata.size()
val SIZE2 = streams.allMetadataForStore(StateStores.RANKINGS_BY_EMAIL_STORE).size()
val range = keyValueStore.all
val HASNEXT = range.hasNext
import org.apache.kafka.streams.KeyValue
while (range.hasNext ) {
val next = range.next
System.out.println(String.format("key: %s | value: %s", next.key, next.value))
}
}
case Failure(f) => println(f)
}
}
在为我做完幸福的日子之后。