在火花流中使用Avro事件并创建数据帧

时间:2018-08-21 18:34:03

标签: scala apache-spark apache-kafka avro confluent

对于火花流和Scala,我并不陌生,需要一些帮助来使用来自kafka的Avro消息并将其转换为Spark数据帧。

请参考以下来自Confluent kafka connect的 Avro 事件,该事件包含 Schema Data-payload

我需要使用它,然后从中创建一个包含“数据行”和“架构”的数据框。听起来有点复杂,但是请您提供一些我可以用来做的示例代码?

{
"schema": {
    "type": "struct",
    "fields": [{
        "type": "string",
        "optional": false,
        "field": "id"
    }, {
        "type": "string",
        "optional": false,
        "field": "dataSourceName"
    }, {
        "type": "array",
        "items": {
            "type": "struct",
            "fields": [{
                "type": "string",
                "optional": false,
                "field": "dataEntityName"
            }, {
                "type": "array",
                "items": {
                    "type": "string",
                    "optional": false
                },
                "optional": false,
                "field": "keyFieldNames"
            }, {
                "type": "array",
                "items": {
                    "type": "struct",
                    "fields": [{
                        "type": "string",
                        "optional": false,
                        "field": "name"
                    }, {
                        "type": "string",
                        "optional": false,
                        "field": "type"
                    }, {
                        "type": "string",
                        "optional": true,
                        "field": "value"
                    }],
                    "optional": false,
                    "name": "Field"
                },
                "optional": false,
                "field": "fields"
            }],
            "optional": false,
            "name": "Change"
        },
        "optional": false,
        "field": "changes"
    }, {
        "type": "string",
        "optional": false,
        "field": "part"
    }],
    "optional": false,
    "name": "AvroTestEvent"
},
"payload": {
    "id": "D434000C",
    "dataSourceName": "EmployeeDB",
    "changes": [{
        "dataEntityName": "dbo.employeeTable",
        "keyFieldNames": ["id"],
        "fields": [{
            "name": "Employee_Id",
            "type": "int",
            "value": "6"
        }, {
            "name": "Employee_Name",
            "type": "varchar",
            "value": "test-employee"
        }]
    }, {
        "dataEntityName": "dbo.departmentTable",
        "keyFieldNames": ["Department_Id"],
        "fields": [{
            "name": "Department_Id",
            "type": "smallint",
            "value": "620"
        }, {
            "name": "Department_Name",
            "type": "varchar",
            "value": "ABCC"
        }]
    }],
    "part": "FULL"
}

}

0 个答案:

没有答案