对于火花流和Scala,我并不陌生,需要一些帮助来使用来自kafka的Avro消息并将其转换为Spark数据帧。
请参考以下来自Confluent kafka connect的 Avro 事件,该事件包含 Schema 和 Data-payload 。
我需要使用它,然后从中创建一个包含“数据行”和“架构”的数据框。听起来有点复杂,但是请您提供一些我可以用来做的示例代码?
{
"schema": {
"type": "struct",
"fields": [{
"type": "string",
"optional": false,
"field": "id"
}, {
"type": "string",
"optional": false,
"field": "dataSourceName"
}, {
"type": "array",
"items": {
"type": "struct",
"fields": [{
"type": "string",
"optional": false,
"field": "dataEntityName"
}, {
"type": "array",
"items": {
"type": "string",
"optional": false
},
"optional": false,
"field": "keyFieldNames"
}, {
"type": "array",
"items": {
"type": "struct",
"fields": [{
"type": "string",
"optional": false,
"field": "name"
}, {
"type": "string",
"optional": false,
"field": "type"
}, {
"type": "string",
"optional": true,
"field": "value"
}],
"optional": false,
"name": "Field"
},
"optional": false,
"field": "fields"
}],
"optional": false,
"name": "Change"
},
"optional": false,
"field": "changes"
}, {
"type": "string",
"optional": false,
"field": "part"
}],
"optional": false,
"name": "AvroTestEvent"
},
"payload": {
"id": "D434000C",
"dataSourceName": "EmployeeDB",
"changes": [{
"dataEntityName": "dbo.employeeTable",
"keyFieldNames": ["id"],
"fields": [{
"name": "Employee_Id",
"type": "int",
"value": "6"
}, {
"name": "Employee_Name",
"type": "varchar",
"value": "test-employee"
}]
}, {
"dataEntityName": "dbo.departmentTable",
"keyFieldNames": ["Department_Id"],
"fields": [{
"name": "Department_Id",
"type": "smallint",
"value": "620"
}, {
"name": "Department_Name",
"type": "varchar",
"value": "ABCC"
}]
}],
"part": "FULL"
}
}