我想使用Flink
实现一个简单的窗口事件计数器。事件的源将是一个数据生成器类,它将以所需的吞吐量生成流。但是,我不确定如何正确编写一个聚合函数来计算每个窗口的事件数。
重要的是,不仅仅是计算计数,我还想跟踪上次更新时间,以表格列出每个窗口的延迟。
class CountAggregateNewX extends AggregateFunction[(String, String, Timestamp), WindowedCount, WindowedCount] {
override def createAccumulator() = WindowedCount(null, "", 0, new java.sql.Timestamp(0L))
override def add(value: (String,String, Timestamp), acc: WindowedCount): WindowedCount = {
val lastUpdate = if (acc.lastUpdate.getTime < value._3.getTime) value._3 else acc.lastUpdate
WindowedCount(null, "", acc.count + 1, lastUpdate)
}
override def getResult(accumulator: WindowedCount): WindowedCount = {
accumulator
}
override def merge(a: WindowedCount, b: WindowedCount): WindowedCount = {
WindowedCount(null, "", a.count + b.count, if (a.lastUpdate.getTime < b.lastUpdate.getTime) b.lastUpdate else a.lastUpdate)
}
}
class WindowAggregateFunction extends ProcessWindowFunction[WindowedCount, WindowedCount, Tuple, TimeWindow]() {
override def process(key: Tuple, context: Context, elements: Iterable[WindowedCount], out: Collector[WindowedCount]): Unit = {
val windowedCount = elements.iterator.next()
out.collect(WindowedCount(new java.sql.Timestamp(context.window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
}
val winevents = events.keyBy(e=>e.event_time).window(TumblingEventTimeWindows.of(windowMillis))
以下代码显示一些错误:
类型不匹配,预期:AggregateFunction [Event,NotInferedACC,NotInferedV],实际:YahooBenchmark.CountAggregateNewX
类型不匹配,预期:AggregateFunction [Event,NotInferedACC,NotInferedV],实际:YahooBenchmark.CountAggregateNew
val wincnt = winevents.aggregate(new CountAggregateNewX(), new WindowAggregateFunction())
数据生成器代码:
class EventGenerator(
tuplesPerSecond: Int,
rampUpTimeSeconds: Int,
campaigns: Array[CampaignAd],
parallelismarg: Int) extends RichParallelSourceFunction[Event] {
var running = true
private val uuid = UUID.randomUUID().toString // used as a dummy value for all events, based on ref code
private val adTypeLength = Variables.AD_TYPES.length
private val eventTypeLength = Variables.EVENT_TYPES.length
private val campaignLength = campaigns.length
private lazy val parallelism = parallelismarg;
//getRuntimeContext().getNumberOfParallelSubtasks()
private def generateElement(i: Long, currentTime: Long): Event = {
val ad_id = campaigns(i % campaignLength toInt).ad_id // ad id for the current event index
val ad_type = Variables.AD_TYPES(i % adTypeLength toInt) // current adtype for event index
val event_type = Variables.EVENT_TYPES(i % eventTypeLength toInt) // current event type for event index
Event(
uuid, // random user, irrelevant
uuid, // random page, irrelevant
ad_id,
ad_type,
event_type,
new java.sql.Timestamp(currentTime),
"255.255.255.255") // generic ipaddress, irrelevant
}