示例json文件
{"Response":{"MetaInfo":{"Timestamp":"2019-11-11T11:25:16.303+0000","NextPageInformation":"2"},"View":[{"_type":"SearchResultsViewType","ViewId":0,"Result":[{"Relevance":1.0,"Distance":4.8,"MatchLevel":"street","MatchQuality":{"Country":1.0,"State":1.0,"County":1.0,"City":1.0,"District":1.0,"Subdistrict":1.0,"PostalCode":1.0},"Location":{"LocationId":"NT_cu4rBChIN2x48NzUtd2.zB_l_715011352_R","LocationType":"point","DisplayPosition":{"Latitude":28.5579732,"Longitude":77.2870505},"MapView":{"TopLeft":{"Latitude":28.55798,"Longitude":77.28678},"BottomRight":{"Latitude":28.55797,"Longitude":77.28762}},"Address":{"Label":"Noor Nagar-Ajmal Bagh, Jamia Nagar, Delhi 110025, India","Country":"IND","State":"DL","County":"South","City":"Delhi","District":"Jamia Nagar","Subdistrict":"Noor Nagar-Ajmal Bagh","PostalCode":"110025","AdditionalData":[{"value":"India","key":"CountryName"},{"value":"Delhi","key":"StateName"},{"value":"South","key":"CountyName"}]},"MapReference":{"ReferenceId":"715011352","Spot":0.32,"SideOfStreet":"right","CountryId":"22806254","StateId":"22803332","CountyId":"22803085","DistrictId":"22803433"}}}
我要从中获取地址字段。
我能够提取到位置,但不能提取地址
pd.io.json.json_normalize(data,['Response','View','Result']).pipe(
lambda x: x.drop('Location', 1).join(
x.Location.apply(lambda y: pd.Series(merge(y)))
)
)
答案 0 :(得分:0)
使用scala spark,您可以递归地将json扁平化:
import org.apache.spark.sql.{ Row, SaveMode, SparkSession, DataFrame }
def recurs(df: DataFrame): DataFrame = {
if(df.schema.fields.find(_.dataType match {
case ArrayType(StructType(_),_) | StructType(_) => true
case _ => false
}).isEmpty) df
else {
val columns = df.schema.fields.map(f => f.dataType match {
case _: ArrayType => explode(col(f.name)).as(f.name)
case s: StructType => col(s"${f.name}.*")
case _ => col(f.name)
})
recurs(df.select(columns:_*))
}
}
val df = spark.read.json(json_location)
flatten_df = recurs(df)
flatten_df.show()