Apache Spark中的JSON文件数组下的设计数组

时间:2018-08-09 10:53:35

标签: apache-spark apache-spark-sql

我已经使用Spark应用程序导出了JSON文件。 ACCOUNTNO是此处唯一的主键。我正在展示一个ACCOUNTNO作为包含多个数组的示例。

{"ACCOUNTNO":10003014,
      "VEHICLE":[{"VEHICLENUMBER":"MH43AJ411",
                            "CUSTOMERID":20000001,
                            "ACCOUNTGROUPID":15,
                            "PREPAIDACCOUNTSTATUSID":3079,
                            "PREPAIDACCOUNTSTATUSDATE":"2015-09-16T14:58:27.500+05:30",
                            "SOURCEOFENTRY":"RegularRetailer",
                            "REVENUECATEGORYID":75,
                            "VEHICLECLASS":"4",
                            "SERIALNO":"206158433290",
                            "HEXTAGID":"91890704803000000C0A",
                            "TAGSTATUS":"TAGINACTIVE",
                            "TAGSTARTEFFDATE":"2014-08-08T14:24:12.227+05:30",
                            "TAGENDEFFDATE":"2015-09-16T15:21:42.437+05:30",
                            "ISTAGBLACKLISTED":true,
                            "ISBLACKLISTHOLD":false,
                            "EMAILADDRESS":"shankarn75@rediffmail.com",
                            "PHONENUMBER":"9004419178     ",
                            "CCreatedDate":"2013-06-07T12:55:54.827+05:30",
                            "CCreatedUser":"bhagwadapos",
                            "CUpdatedDate":"2013-06-07T12:55:54.827+05:30",
                            "CUpdatedUser":"bhagwadapos"}],
      "ADDRESS":[{"CUSTADDRESSID":41,
                            "ADDRESSTYPE":"Mailing",
                            "ADDRESSLINE1":"B309 PROGRESSIVE SIGNATURE",
                            "ADDRESSLINE2":"SECTOR-6",
                            "ADDRESSLINE3":"GHANSOLI",
                            "CITY":"NAVI MUMBAI",
                            "STATE":"MH",
                            "COUNTRY":"IND",
                            "ZIP1":"400701",
                            "ISACTIVE":true,
                            "ISCOMMUNICATION":true,
                            "CREATEDDATE":"2013-06-07T12:55:54.827+05:30",
                            "CREATEDUSER":"bhagwadapos",
                            "UPDATEDDATE":"2013-06-07T12:55:54.827+05:30",
                            "UPDATEDUSER":"bhagwadapos"}],
        "BUSINESS":[{}],
        "EMAIL":[{"CUSTMAILID":33,
                            "EMAILTYPE":"PrimaryEmail",
                            "EMAIL":"shankarn75@rediffmail.com",
                            "ISACTIVE_EMAIL":true,
                            "ISCOMMUNICATION_EMAIL":true}],
        "LOGIN":[{"LOGINID":24,
                            "USERNAME":"shankarn75",
                            "PASSWORD":"u19PkvCFYgDtkagLPMOSgA==",
                            "LAST_LOGINDATE":"2014-09-10T10:26:45.310+05:30",
                            "LAST_PWD_MODIFIEDDATE":"2014-08-14T12:32:13.647+05:30",
                            "CURRENT_PWD_EXPIRYDATE":"2014-11-07T12:32:13.677+05:30",
                            "PWD_ATTEMPTS_COUNT":0,
                            "ISLOCKED":false,
                            "THEMES":"Maroon",
                            "LANGUAGES":"en-IN",
                            "STATUSID":2111,
                            "USERTYPEID":2,
                            "ROLENAME":"Admin"}],
        "PHONES":[{"CUSTPHONEID":73,
                            "PHONETYPE":"Fax",
                            "PHONENUMBER_PHONES":"",
                            "EXTENTION":"",
                            "ISACTIVE_PHONES":true,
                            "ISCOMMUNICATION_PHONES":false},
                            {"CUSTPHONEID":75,"PHONETYPE":"OfficePhone",
                                                                "PHONENUMBER_PHONES":"",
                                                                "EXTENTION":"",
                                                                "ISACTIVE_PHONES":true,
                                                                "ISCOMMUNICATION_PHONES":false},
                            {"CUSTPHONEID":76,
                                                                "PHONETYPE":"MobileNo",
                                                                "PHONENUMBER_PHONES":"9004419178",
                                                                "EXTENTION":"",
                                                                "ISACTIVE_PHONES":true,
                                                                "ISCOMMUNICATION_PHONES":true},
                            {"CUSTPHONEID":74,
                                                                "PHONETYPE":"HomePhone",
                                                                "PHONENUMBER_PHONES":"",
                                                                "EXTENTION":"",
                                                                "ISACTIVE_PHONES":true,
                                                                "ISCOMMUNICATION_PHONES":false}]}
{"ACCOUNTNO":10003015,
    "VEHICLE":[{"VEHICLENUMBER":............................................

我已经在这里使用collect_set(struct(来实现自己的目标,并且我已经成功实现了目标。

SELECT C_ACCNO AS ACCOUNTNO, collect_set(struct(VEHICLENUMBER, CUSTOMERID,ACCOUNTGROUPID,PREPAIDACCOUNTSTATUSID,PREPAIDACCOUNTSTATUSDATE,SOURCEOFENTRY,REVENUECATEGORYID,VEHICLECLASS,SERIALNO,HEXTAGID,TAGSTATUS,TAGSTARTEFFDATE,TAGENDEFFDATE,ISTAGBLACKLISTED,ISBLACKLISTHOLD,RCVERIFICATIONSTATUS,EMAILADDRESS,PHONENUMBER,ISFEEWAIVER,FEEWAIVERPASSTYPE,VEHICLEIMGVERIFICATIONSTATUS,TAGTID,ISREVENUERECHARGE,CCreatedDate,CCreatedUser,CUpdatedDate,CUpdatedUser)) as VEHICLE, collect_set(struct(CUSTADDRESSID ,ADDRESSTYPE ,ADDRESSLINE1 ,ADDRESSLINE2 ,ADDRESSLINE3 ,CITY,STATE  ,COUNTRY  ,ZIP1  ,ISACTIVE  ,ISCOMMUNICATION  ,CREATEDDATE  ,CREATEDUSER  ,UPDATEDDATE  ,UPDATEDUSER  ,REASONCODE ,ZIP2)) as ADDRESS, collect_set(struct(ORGANISATIONNAME,DATEOFINCORPORATION,PANCARDNUMBER,ORGANIZATIONTYPEID)) as BUSINESS, collect_set(struct(CUSTMAILID,EMAILTYPE,EMAIL,ISACTIVE_EMAIL,ISCOMMUNICATION_EMAIL)) as EMAIL, collect_set(struct(LOGINID,  USERNAME,   PASSWORD,   LAST_LOGINDATE, LAST_PWD_MODIFIEDDATE,  CURRENT_PWD_EXPIRYDATE, PWD_ATTEMPTS_COUNT, PINNUMBER,  ISLOCKED,THEMES,LANGUAGES,  STATUSID,   USERTYPEID, ROLENAME,   SQ_ATTEMPTCOUNT,    SQ_LOCKOUTTIME)) as LOGIN, collect_set(struct(CUSTPHONEID,  PHONETYPE, PHONENUMBER_PHONES,  EXTENTION, ISACTIVE_PHONES, ISCOMMUNICATION_PHONES)) as PHONES FROM joined_acc_phones GROUP BY ACCOUNTNO ORDER BY ACCOUNTNO

现在,我想在JSON文件中的数组下创建一个数组,我的意思是VEHICLE数组包含值VEHICLENUMBER,因此我想在同一JSON文件中的VEHICLENUMBER下创建另一个数组。我希望VEHICLENUMBER将具有多个key:value。怎么做?需要帮忙。谢谢。

我尝试过这种方法,但失败了,抛出错误。

val query2 = "SELECT C_ACCNO AS ACCOUNTNO, collect_set(collect_set(TAGSTATUS,SERIALNO), struct(VEHICLENUMBER, CUSTOMERID, ACCOUNTGROUPID, PREPAIDACCOUNTSTATUSID,PREPAIDACCOUNTSTATUSDATE,SOURCEOFENTRY,REVENUECATEGORYID,VEHICLECLASS,HEXTAGID,CCreatedDate,CCreatedUser,CUpdatedDate,CUpdatedUser)) as VEHICLE, collect_set(struct(CUSTADDRESSID ,ADDRESSTYPE ,ADDRESSLINE1 ,ADDRESSLINE2 ,ADDRESSLINE3 ,CITY,STATE  ,COUNTRY  ,ZIP1  ,ISACTIVE  ,ISCOMMUNICATION, REASONCODE, ZIP2)) as ADDRESS FROM joined_acc_add GROUP BY ACCOUNTNO ORDER BY ACCOUNTNO"
     val res01 = sqlContext.sql(query2.toString)
     res01.show(10)
     res01.coalesce(1).write.json("D:/triptest3")

预期输出:

 {
    "ACCOUNTNO": 10003014,
    "VEHICLE": [{
        "VEHICLENUMBER": "MH43AJ411",
        "trips": [{
                "inboundtime": "14: 58: 27",
                "length": 131,
                "timestamp": 1447837200
            },
            {
                "inboundtime": "16: 27: 10",
                "length": 131,
                "timestamp": 1447840800
            }
        ],
        "CUSTOMERID": 20000001,
        "ACCOUNTGROUPID": 15,
        "PREPAIDACCOUNTSTATUSID": 3079,
        "PREPAIDACCOUNTSTATUSDATE": "2015-09-16T14:58:27.500+05:30",
        "SOURCEOFENTRY": "RegularRetailer",
        "REVENUECATEGORYID": 75,
        "VEHICLECLASS": "4",
        "SERIALNO": "206158433290",
        "HEXTAGID": "91890704803000000C0A",
        "TAGSTATUS": "TAGINACTIVE",
        "TAGSTARTEFFDATE": "2014-08-08T14:24:12.227+05:30",
        "TAGENDEFFDATE": "2015-09-16T15:21:42.437+05:30",
        "ISTAGBLACKLISTED": true,
        "ISBLACKLISTHOLD": false,
        "EMAILADDRESS": "shankarn75@rediffmail.com",
        "PHONENUMBER": "9004419178     ",
        "CCreatedDate": "2013-06-07T12:55:54.827+05:30",
        "CCreatedUser": "bhagwadapos",
        "CUpdatedDate": "2013-06-07T12:55:54.827+05:30",
        "CUpdatedUser": "bhagwadapos"
    }],
    "ADDRESS": [{
        "CUSTADDRESSID": 41,
        "ADDRESSTYPE": "Mailing",
        "ADDRESSLINE1": "B309 PROGRESSIVE SIGNATURE",
        "ADDRESSLINE2": "SECTOR-6",
        "ADDRESSLINE3": "GHANSOLI",
        "CITY": "NAVI MUMBAI",
        "STATE": "MH",
        "COUNTRY": "IND",
        "ZIP1": "400701",
        "ISACTIVE": true,
        "ISCOMMUNICATION": true,
        "CREATEDDATE": "2013-06-07T12:55:54.827+05:30",
        "CREATEDUSER": "bhagwadapos",
        "UPDATEDDATE": "2013-06-07T12:55:54.827+05:30",
        "UPDATEDUSER": "bhagwadapos"
    }],
    "BUSINESS": [{}],
    "EMAIL": [{
        "CUSTMAILID": 33,
        "EMAILTYPE": "PrimaryEmail",
        "EMAIL": "shankarn75@rediffmail.com",
        "ISACTIVE_EMAIL": true,
        "ISCOMMUNICATION_EMAIL": true
    }],
    "LOGIN": [{
        "LOGINID": 24,
        "USERNAME": "shankarn75",
        "PASSWORD": "u19PkvCFYgDtkagLPMOSgA==",
        "LAST_LOGINDATE": "2014-09-10T10:26:45.310+05:30",
        "LAST_PWD_MODIFIEDDATE": "2014-08-14T12:32:13.647+05:30",
        "CURRENT_PWD_EXPIRYDATE": "2014-11-07T12:32:13.677+05:30",
        "PWD_ATTEMPTS_COUNT": 0,
        "ISLOCKED": false,
        "THEMES": "Maroon",
        "LANGUAGES": "en-IN",
        "STATUSID": 2111,
        "USERTYPEID": 2,
        "ROLENAME": "Admin"
    }],
    "PHONES": [{
            "CUSTPHONEID": 73,
            "PHONETYPE": "Fax",
            "PHONENUMBER_PHONES": "",
            "EXTENTION": "",
            "ISACTIVE_PHONES": true,
            "ISCOMMUNICATION_PHONES": false
        },
        {
            "CUSTPHONEID": 75,
            "PHONETYPE": "OfficePhone",
            "PHONENUMBER_PHONES": "",
            "EXTENTION": "",
            "ISACTIVE_PHONES": true,
            "ISCOMMUNICATION_PHONES": false
        },
        {
            "CUSTPHONEID": 76,
            "PHONETYPE": "MobileNo",
            "PHONENUMBER_PHONES": "9004419178",
            "EXTENTION": "",
            "ISACTIVE_PHONES": true,
            "ISCOMMUNICATION_PHONES": true
        },
        {
            "CUSTPHONEID": 74,
            "PHONETYPE": "HomePhone",
            "PHONENUMBER_PHONES": "",
            "EXTENTION": "",
            "ISACTIVE_PHONES": true,
            "ISCOMMUNICATION_PHONES": false
        }
    ]
 }

0 个答案:

没有答案