我想在AWS胶水中使用pyspark在嵌套JSON中写入输出。我已完成以下步骤:
我在AWS胶水中使用下面的pyspark代码
applymapping1 = ApplyMapping.apply(frame = dynJoin, mappings = [("patientid", "decimal(19,0)",
"patientid", "decimal(19,0)"),("last_name", "string", "last_name", "string"),("first_name", "string",
"first_name", "string"),("middle_name", "string", "middle_name", "string"),("prefix", "string",
"prefix", "string"),("suffix", "string", "suffixe", "string"),("street_address_1", "string",
"street_address_1", "string"), ("street_address_2", "string", "street_address_2", "string"), ("zip",
"string", "zip", "string"), ("city", "string", "city", "string"),("country", "string", "country",
"string"), ("group_name", "string", "group_name", "string"), ("group_id", "string", "group_id",
"string"),("current_member_id", "decimal(19,0)", "current_member_id", "decimal(19,0)")],
transformation_ctx = "applymapping1")
def MergeAddress(rec):
del rec["street_address_1"]
del rec["street_address_2"]
del rec["zip"]
del rec["city"]
del rec["country"]
return rec
mapped_dyF = Map.apply(frame = applymapping1, f = MergeAddress)
{"patientid":8002,"Address":{"Array":["18 Orchard Avenue",null,"19001","Abington",null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1000434787}
{"patientid":8001,"Address":{"Array":["333 Oak Street",null,"34801","Bradenton",null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1222333444}
{"patientid":8001,"Address":{"Array":["102 North Main Street","Suite 41","32801","Orlando",null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1222333444}
{"patientid":8003,"Address":{"Array":[null,null,null,null,null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":12288889444}
但是,输出必须采用以下格式
{"patientid":8001,
"Address":
[{"street_address_1":"333 Oak Street","street_address_2":null,"zip":"34801","city":"Bradenton","country":null},
{"street_address_1":"102 North Main Street","street_address_2":"Suite 41","zip":"32801","city":"Orlando","country":null}
]
,"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1222333444
}