我已经使用Spark应用程序导出了JSON文件。 ACCOUNTNO
是此处唯一的主键。我正在展示一个ACCOUNTNO
作为包含多个数组的示例。
{"ACCOUNTNO":10003014,
"VEHICLE":[{"VEHICLENUMBER":"MH43AJ411",
"CUSTOMERID":20000001,
"ACCOUNTGROUPID":15,
"PREPAIDACCOUNTSTATUSID":3079,
"PREPAIDACCOUNTSTATUSDATE":"2015-09-16T14:58:27.500+05:30",
"SOURCEOFENTRY":"RegularRetailer",
"REVENUECATEGORYID":75,
"VEHICLECLASS":"4",
"SERIALNO":"206158433290",
"HEXTAGID":"91890704803000000C0A",
"TAGSTATUS":"TAGINACTIVE",
"TAGSTARTEFFDATE":"2014-08-08T14:24:12.227+05:30",
"TAGENDEFFDATE":"2015-09-16T15:21:42.437+05:30",
"ISTAGBLACKLISTED":true,
"ISBLACKLISTHOLD":false,
"EMAILADDRESS":"shankarn75@rediffmail.com",
"PHONENUMBER":"9004419178 ",
"CCreatedDate":"2013-06-07T12:55:54.827+05:30",
"CCreatedUser":"bhagwadapos",
"CUpdatedDate":"2013-06-07T12:55:54.827+05:30",
"CUpdatedUser":"bhagwadapos"}],
"ADDRESS":[{"CUSTADDRESSID":41,
"ADDRESSTYPE":"Mailing",
"ADDRESSLINE1":"B309 PROGRESSIVE SIGNATURE",
"ADDRESSLINE2":"SECTOR-6",
"ADDRESSLINE3":"GHANSOLI",
"CITY":"NAVI MUMBAI",
"STATE":"MH",
"COUNTRY":"IND",
"ZIP1":"400701",
"ISACTIVE":true,
"ISCOMMUNICATION":true,
"CREATEDDATE":"2013-06-07T12:55:54.827+05:30",
"CREATEDUSER":"bhagwadapos",
"UPDATEDDATE":"2013-06-07T12:55:54.827+05:30",
"UPDATEDUSER":"bhagwadapos"}],
"BUSINESS":[{}],
"EMAIL":[{"CUSTMAILID":33,
"EMAILTYPE":"PrimaryEmail",
"EMAIL":"shankarn75@rediffmail.com",
"ISACTIVE_EMAIL":true,
"ISCOMMUNICATION_EMAIL":true}],
"LOGIN":[{"LOGINID":24,
"USERNAME":"shankarn75",
"PASSWORD":"u19PkvCFYgDtkagLPMOSgA==",
"LAST_LOGINDATE":"2014-09-10T10:26:45.310+05:30",
"LAST_PWD_MODIFIEDDATE":"2014-08-14T12:32:13.647+05:30",
"CURRENT_PWD_EXPIRYDATE":"2014-11-07T12:32:13.677+05:30",
"PWD_ATTEMPTS_COUNT":0,
"ISLOCKED":false,
"THEMES":"Maroon",
"LANGUAGES":"en-IN",
"STATUSID":2111,
"USERTYPEID":2,
"ROLENAME":"Admin"}],
"PHONES":[{"CUSTPHONEID":73,
"PHONETYPE":"Fax",
"PHONENUMBER_PHONES":"",
"EXTENTION":"",
"ISACTIVE_PHONES":true,
"ISCOMMUNICATION_PHONES":false},
{"CUSTPHONEID":75,"PHONETYPE":"OfficePhone",
"PHONENUMBER_PHONES":"",
"EXTENTION":"",
"ISACTIVE_PHONES":true,
"ISCOMMUNICATION_PHONES":false},
{"CUSTPHONEID":76,
"PHONETYPE":"MobileNo",
"PHONENUMBER_PHONES":"9004419178",
"EXTENTION":"",
"ISACTIVE_PHONES":true,
"ISCOMMUNICATION_PHONES":true},
{"CUSTPHONEID":74,
"PHONETYPE":"HomePhone",
"PHONENUMBER_PHONES":"",
"EXTENTION":"",
"ISACTIVE_PHONES":true,
"ISCOMMUNICATION_PHONES":false}]}
{"ACCOUNTNO":10003015,
"VEHICLE":[{"VEHICLENUMBER":............................................
我已经在这里使用collect_set(struct(
来实现自己的目标,并且我已经成功实现了目标。
SELECT C_ACCNO AS ACCOUNTNO, collect_set(struct(VEHICLENUMBER, CUSTOMERID,ACCOUNTGROUPID,PREPAIDACCOUNTSTATUSID,PREPAIDACCOUNTSTATUSDATE,SOURCEOFENTRY,REVENUECATEGORYID,VEHICLECLASS,SERIALNO,HEXTAGID,TAGSTATUS,TAGSTARTEFFDATE,TAGENDEFFDATE,ISTAGBLACKLISTED,ISBLACKLISTHOLD,RCVERIFICATIONSTATUS,EMAILADDRESS,PHONENUMBER,ISFEEWAIVER,FEEWAIVERPASSTYPE,VEHICLEIMGVERIFICATIONSTATUS,TAGTID,ISREVENUERECHARGE,CCreatedDate,CCreatedUser,CUpdatedDate,CUpdatedUser)) as VEHICLE, collect_set(struct(CUSTADDRESSID ,ADDRESSTYPE ,ADDRESSLINE1 ,ADDRESSLINE2 ,ADDRESSLINE3 ,CITY,STATE ,COUNTRY ,ZIP1 ,ISACTIVE ,ISCOMMUNICATION ,CREATEDDATE ,CREATEDUSER ,UPDATEDDATE ,UPDATEDUSER ,REASONCODE ,ZIP2)) as ADDRESS, collect_set(struct(ORGANISATIONNAME,DATEOFINCORPORATION,PANCARDNUMBER,ORGANIZATIONTYPEID)) as BUSINESS, collect_set(struct(CUSTMAILID,EMAILTYPE,EMAIL,ISACTIVE_EMAIL,ISCOMMUNICATION_EMAIL)) as EMAIL, collect_set(struct(LOGINID, USERNAME, PASSWORD, LAST_LOGINDATE, LAST_PWD_MODIFIEDDATE, CURRENT_PWD_EXPIRYDATE, PWD_ATTEMPTS_COUNT, PINNUMBER, ISLOCKED,THEMES,LANGUAGES, STATUSID, USERTYPEID, ROLENAME, SQ_ATTEMPTCOUNT, SQ_LOCKOUTTIME)) as LOGIN, collect_set(struct(CUSTPHONEID, PHONETYPE, PHONENUMBER_PHONES, EXTENTION, ISACTIVE_PHONES, ISCOMMUNICATION_PHONES)) as PHONES FROM joined_acc_phones GROUP BY ACCOUNTNO ORDER BY ACCOUNTNO
现在,我想在JSON文件中的数组下创建一个数组,我的意思是VEHICLE数组包含值VEHICLENUMBER
,因此我想在同一JSON文件中的VEHICLENUMBER
下创建另一个数组。我希望VEHICLENUMBER
将具有多个key:value。怎么做?需要帮忙。谢谢。
我尝试过这种方法,但失败了,抛出错误。
val query2 = "SELECT C_ACCNO AS ACCOUNTNO, collect_set(collect_set(TAGSTATUS,SERIALNO), struct(VEHICLENUMBER, CUSTOMERID, ACCOUNTGROUPID, PREPAIDACCOUNTSTATUSID,PREPAIDACCOUNTSTATUSDATE,SOURCEOFENTRY,REVENUECATEGORYID,VEHICLECLASS,HEXTAGID,CCreatedDate,CCreatedUser,CUpdatedDate,CUpdatedUser)) as VEHICLE, collect_set(struct(CUSTADDRESSID ,ADDRESSTYPE ,ADDRESSLINE1 ,ADDRESSLINE2 ,ADDRESSLINE3 ,CITY,STATE ,COUNTRY ,ZIP1 ,ISACTIVE ,ISCOMMUNICATION, REASONCODE, ZIP2)) as ADDRESS FROM joined_acc_add GROUP BY ACCOUNTNO ORDER BY ACCOUNTNO"
val res01 = sqlContext.sql(query2.toString)
res01.show(10)
res01.coalesce(1).write.json("D:/triptest3")
预期输出:
{
"ACCOUNTNO": 10003014,
"VEHICLE": [{
"VEHICLENUMBER": "MH43AJ411",
"trips": [{
"inboundtime": "14: 58: 27",
"length": 131,
"timestamp": 1447837200
},
{
"inboundtime": "16: 27: 10",
"length": 131,
"timestamp": 1447840800
}
],
"CUSTOMERID": 20000001,
"ACCOUNTGROUPID": 15,
"PREPAIDACCOUNTSTATUSID": 3079,
"PREPAIDACCOUNTSTATUSDATE": "2015-09-16T14:58:27.500+05:30",
"SOURCEOFENTRY": "RegularRetailer",
"REVENUECATEGORYID": 75,
"VEHICLECLASS": "4",
"SERIALNO": "206158433290",
"HEXTAGID": "91890704803000000C0A",
"TAGSTATUS": "TAGINACTIVE",
"TAGSTARTEFFDATE": "2014-08-08T14:24:12.227+05:30",
"TAGENDEFFDATE": "2015-09-16T15:21:42.437+05:30",
"ISTAGBLACKLISTED": true,
"ISBLACKLISTHOLD": false,
"EMAILADDRESS": "shankarn75@rediffmail.com",
"PHONENUMBER": "9004419178 ",
"CCreatedDate": "2013-06-07T12:55:54.827+05:30",
"CCreatedUser": "bhagwadapos",
"CUpdatedDate": "2013-06-07T12:55:54.827+05:30",
"CUpdatedUser": "bhagwadapos"
}],
"ADDRESS": [{
"CUSTADDRESSID": 41,
"ADDRESSTYPE": "Mailing",
"ADDRESSLINE1": "B309 PROGRESSIVE SIGNATURE",
"ADDRESSLINE2": "SECTOR-6",
"ADDRESSLINE3": "GHANSOLI",
"CITY": "NAVI MUMBAI",
"STATE": "MH",
"COUNTRY": "IND",
"ZIP1": "400701",
"ISACTIVE": true,
"ISCOMMUNICATION": true,
"CREATEDDATE": "2013-06-07T12:55:54.827+05:30",
"CREATEDUSER": "bhagwadapos",
"UPDATEDDATE": "2013-06-07T12:55:54.827+05:30",
"UPDATEDUSER": "bhagwadapos"
}],
"BUSINESS": [{}],
"EMAIL": [{
"CUSTMAILID": 33,
"EMAILTYPE": "PrimaryEmail",
"EMAIL": "shankarn75@rediffmail.com",
"ISACTIVE_EMAIL": true,
"ISCOMMUNICATION_EMAIL": true
}],
"LOGIN": [{
"LOGINID": 24,
"USERNAME": "shankarn75",
"PASSWORD": "u19PkvCFYgDtkagLPMOSgA==",
"LAST_LOGINDATE": "2014-09-10T10:26:45.310+05:30",
"LAST_PWD_MODIFIEDDATE": "2014-08-14T12:32:13.647+05:30",
"CURRENT_PWD_EXPIRYDATE": "2014-11-07T12:32:13.677+05:30",
"PWD_ATTEMPTS_COUNT": 0,
"ISLOCKED": false,
"THEMES": "Maroon",
"LANGUAGES": "en-IN",
"STATUSID": 2111,
"USERTYPEID": 2,
"ROLENAME": "Admin"
}],
"PHONES": [{
"CUSTPHONEID": 73,
"PHONETYPE": "Fax",
"PHONENUMBER_PHONES": "",
"EXTENTION": "",
"ISACTIVE_PHONES": true,
"ISCOMMUNICATION_PHONES": false
},
{
"CUSTPHONEID": 75,
"PHONETYPE": "OfficePhone",
"PHONENUMBER_PHONES": "",
"EXTENTION": "",
"ISACTIVE_PHONES": true,
"ISCOMMUNICATION_PHONES": false
},
{
"CUSTPHONEID": 76,
"PHONETYPE": "MobileNo",
"PHONENUMBER_PHONES": "9004419178",
"EXTENTION": "",
"ISACTIVE_PHONES": true,
"ISCOMMUNICATION_PHONES": true
},
{
"CUSTPHONEID": 74,
"PHONETYPE": "HomePhone",
"PHONENUMBER_PHONES": "",
"EXTENTION": "",
"ISACTIVE_PHONES": true,
"ISCOMMUNICATION_PHONES": false
}
]
}