我正在为Flink Kafka Producer编写一个keyerialization方案。这是我的源代码。
@SerialVersionUID(100L)
class KafkaSink private (dataStream: DataStream[Map[String, Any]], source: String) extends Logging with Serializable {
private[sink] lazy val kafkaSink = new FlinkKafkaProducer010[Map[String, Any]](
// broker list
ExecutionEnv.sinkTopic, // target topic
new KeyedSerializationSchema[Map[String, Any]] {
override def serializeKey(element: Map[String, Any]): Array[Byte] = KafkaSink.this
.keySerializer(element)
// {
// val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
// val oos = new ObjectOutputStream(stream)
// oos.writeObject(element("CMLS_ACCT_NUM"))
// oos.close()
// stream.toByteArray
// }
override def serializeValue(element: Map[String, Any]): Array[Byte] = {
val avroRecord: GenericData.Record = new GenericData.Record(schema)
for ((k, v) <- element) avroRecord.put(k, v)
val stream: Array[Byte] = recordInjection.apply(avroRecord)
stream
}
override def getTargetTopic(element: Map[String, Any]): String = ExecutionEnv.sinkTopic
}, ExecutionEnv.kafkaSinkProperties)
def keySerializer(element: Map[String, Any]) = {
val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
val oos = new ObjectOutputStream(stream)
oos.writeObject(element("CMLS_ACCT_NUM"))
oos.close()
stream.toByteArray
}
private[sink] def valueSerializer(element: Map[String, Any]) = {
val avroRecord: GenericData.Record = new GenericData.Record(schema)
for ((k, v) <- element) avroRecord.put(k, v)
val stream: Array[Byte] = recordInjection.apply(avroRecord)
stream
}
def sendToKafka[T]: Unit = dataStream.addSink(kafkaSink)
}
object KafkaSink {
def apply(dataStream: DataStream[Map[String, Any]], source: String): KafkaSink = new KafkaSink(dataStream, source)
}
为什么会出现错误
org.apache.flink.api.common.InvalidProgramException:对象KafkaSink $$ anon $ 1 @ 736caf7a无法序列化
即使我已将KafkaSink类设置为可序列化的。当我有匿名类KeyedSerializationSchema在代码块而不是外部类(KafkaSink)中的方法中重写keySerializer和valueSerializer时,此程序将起作用。