我有一个json文本文件,内容为
{"type":"session.ended","v":2,"post.source":"1306210600-col001.sv1.movenetworks.com:2097#5","post.ip4":"75.114.187.146","post.rtime_secs":1371794661,"post.rtime_text":"2013-06-21 06:04:20","post.time_secs":1371794596,"post.time_text":"2013-06-21 06:03:16","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":65,"id.session":"3625657","id.sub":"2370b726-b96e-11e2-b3eb-1231380e1adf","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3050X","device.info":"Roku;3050X","device.serial":"12C241003940","device.version":"2.5.0.7","device.uuid":"51ce2255-62ad-5778-b2d7-b9543c1476c6","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130329","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"48154","ip.dma":505,"ip.dma_name":"unknown","ip.city":"livonia","ip.country":"united states","ip.region":"michigan","ip.continent":"north america","ip.isp":"bright house networks llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1338713","clip.pub":"CBD46B77","asset.id":524768,"asset.pub":"CBD46B77","asset.length_ms":1800000,"asset.guid":"b7c12c09dc5aec832e142a00b0f191fa","asset.title":"Diya Aur Bati Hum","asset.type":"captured","asset.adult":false,"asset.franchise_guid":"941496e1452b4fce9acfe7b3339924eb","asset.franchise_title":"Diya Aur Bati Hum","container.id":14,"container.pub":"CBD46B77","container.guid":"3caa6afd715e4c57ac4750d29e449a9c","container.title":"SPLUS","usage.elapsed_ms":2312,"usage.viewed_ms":392,"usage.stage":"mainVideo","exp.idle_ms":350,"exp.stalls":0,"exp.stalled_ms":0,"exp.frame_renders":0,"exp.frame_drops":0,"exp.ghost_session_ms":0,"exp.ghost_sessions":0,"exp.qmx_stale_ms":0,"exp.qmx_error_ms":0,"exp.qss_late_ms":0,"exp.qss_error_ms":0,"exp.fom":0,"exp.fom_weight":0,"data.dl_bytes":228,"data.ul_bytes":406,"http.oks":2,"http.errors":0,"http.timeouts":0,"net.throughput":8977,"http.slows":0,"data.bitrate_mean":1968,"data.bitrate_stddev":0,"data.bitrate_median":1950,"data.bitrate_modes":[1950],"data.streamlets":1,"data.late_streamlets":0,"data.all_streamlets":0,"data.bf_streamlets":0,"data.ab_streamlets":0}
{"type":"session.started","v":2,"post.source":"1306210600-col004.sv1.movenetworks.com:2183#6","post.ip4":"63.225.172.43","post.rtime_secs":1371794671,"post.rtime_text":"2013-06-21 06:04:31","post.time_secs":1371794660,"post.time_text":"2013-06-21 06:04:20","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":11,"id.session":"232169818","id.sub":"55d514ba-3858-11e2-91a7-12313d08e01f","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3100X","device.info":"Roku;3100X","device.serial":"13C2AE061481","device.version":"2.5.0.37","device.uuid":"7f5654d5-3aa7-5a5f-bb2b-8084da358942","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130615","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"98115","ip.dma":819,"ip.dma_name":"unknown","ip.city":"seattle","ip.country":"united states","ip.region":"washington","ip.continent":"north america","ip.isp":"qwest communications company llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1339170","clip.pub":"CBD46B77","asset.id":522015,"asset.pub":"CBD46B77","asset.length_ms":7200000,"asset.guid":"c6938cfa200a21e90dce41f5ed131cc2","asset.title":"Spark Top 20","asset.type":"captured","asset.adult":false,"container.id":277,"container.pub":"CBD46B77","container.guid":"03e3a689e245457bba2f98c30ef931fa","container.title":"BIGMGC","usage.stage":"mainVideo","exp.idle_ms":5772}
我想在R中加载它并转换为数据帧。这里的字段名称是数据的一部分,而且每行中的字段数不相等(总共13行)
任何帮助将不胜感激。
答案 0 :(得分:3)
这是一种方法:
file <- '[
{"type":"session.ended","v":2,"post.source":"1306210600-col001.sv1.movenetworks.com:2097#5","post.ip4":"75.114.187.146","post.rtime_secs":1371794661,"post.rtime_text":"2013-06-21 06:04:20","post.time_secs":1371794596,"post.time_text":"2013-06-21 06:03:16","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":65,"id.session":"3625657","id.sub":"2370b726-b96e-11e2-b3eb-1231380e1adf","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3050X","device.info":"Roku;3050X","device.serial":"12C241003940","device.version":"2.5.0.7","device.uuid":"51ce2255-62ad-5778-b2d7-b9543c1476c6","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130329","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"48154","ip.dma":505,"ip.dma_name":"unknown","ip.city":"livonia","ip.country":"united states","ip.region":"michigan","ip.continent":"north america","ip.isp":"bright house networks llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1338713","clip.pub":"CBD46B77","asset.id":524768,"asset.pub":"CBD46B77","asset.length_ms":1800000,"asset.guid":"b7c12c09dc5aec832e142a00b0f191fa","asset.title":"Diya Aur Bati Hum","asset.type":"captured","asset.adult":false,"asset.franchise_guid":"941496e1452b4fce9acfe7b3339924eb","asset.franchise_title":"Diya Aur Bati Hum","container.id":14,"container.pub":"CBD46B77","container.guid":"3caa6afd715e4c57ac4750d29e449a9c","container.title":"SPLUS","usage.elapsed_ms":2312,"usage.viewed_ms":392,"usage.stage":"mainVideo","exp.idle_ms":350,"exp.stalls":0,"exp.stalled_ms":0,"exp.frame_renders":0,"exp.frame_drops":0,"exp.ghost_session_ms":0,"exp.ghost_sessions":0,"exp.qmx_stale_ms":0,"exp.qmx_error_ms":0,"exp.qss_late_ms":0,"exp.qss_error_ms":0,"exp.fom":0,"exp.fom_weight":0,"data.dl_bytes":228,"data.ul_bytes":406,"http.oks":2,"http.errors":0,"http.timeouts":0,"net.throughput":8977,"http.slows":0,"data.bitrate_mean":1968,"data.bitrate_stddev":0,"data.bitrate_median":1950,"data.bitrate_modes":[1950],"data.streamlets":1,"data.late_streamlets":0,"data.all_streamlets":0,"data.bf_streamlets":0,"data.ab_streamlets":0}
,{"type":"session.started","v":2,"post.source":"1306210600-col004.sv1.movenetworks.com:2183#6","post.ip4":"63.225.172.43","post.rtime_secs":1371794671,"post.rtime_text":"2013-06-21 06:04:31","post.time_secs":1371794660,"post.time_text":"2013-06-21 06:04:20","post.time_date":"2013-06-21","post.time_hour":6,"post.late_secs":11,"id.session":"232169818","id.sub":"55d514ba-3858-11e2-91a7-12313d08e01f","id.partner":"0CB48A664E514CA48D378D152574EDBB","id.service":"BBTV (CBD46B77)","device.make":"Roku","device.model":"3100X","device.info":"Roku;3100X","device.serial":"13C2AE061481","device.version":"2.5.0.37","device.uuid":"7f5654d5-3aa7-5a5f-bb2b-8084da358942","device.os":"Linux","device.os_version":"2.6.35","device.platform":"Roku","device.platform_vendor":"Move Networks","device.platform_version":"1.0.0.20130615","device.licenses":[],"user.type":"Subscriber","ip.provider":"netacuity","ip.postal_code":"98115","ip.dma":819,"ip.dma_name":"unknown","ip.city":"seattle","ip.country":"united states","ip.region":"washington","ip.continent":"north america","ip.isp":"qwest communications company llc","ip.asn":0,"ip.asn_owner":"?","clip.id":"1339170","clip.pub":"CBD46B77","asset.id":522015,"asset.pub":"CBD46B77","asset.length_ms":7200000,"asset.guid":"c6938cfa200a21e90dce41f5ed131cc2","asset.title":"Spark Top 20","asset.type":"captured","asset.adult":false,"container.id":277,"container.pub":"CBD46B77","container.guid":"03e3a689e245457bba2f98c30ef931fa","container.title":"BIGMGC","usage.stage":"mainVideo","exp.idle_ms":5772}
]'
您需要fromJSON
包的功能RJSONIO
:
library(RJSONIO)
json <- fromJSON(file, nullValue = NA)
按NA
替换(空)列表并转换为数据框:
dat <- lapply(json, function(j) {
as.data.frame(replace(j, sapply(j, is.list), NA))
})
创建单个数据框:
library(plyr)
res <- rbind.fill(dat)
结果(res
):
type v
1 session.ended 2
2 session.started 2
post.source
1 1306210600-col001.sv1.movenetworks.com:2097#5
2 1306210600-col004.sv1.movenetworks.com:2183#6
post.ip4 post.rtime_secs post.rtime_text
1 75.114.187.146 1371794661 2013-06-21 06:04:20
2 63.225.172.43 1371794671 2013-06-21 06:04:31
post.time_secs post.time_text post.time_date
1 1371794596 2013-06-21 06:03:16 2013-06-21
2 1371794660 2013-06-21 06:04:20 2013-06-21
post.time_hour post.late_secs id.session
1 6 65 3625657
2 6 11 232169818
id.sub
1 2370b726-b96e-11e2-b3eb-1231380e1adf
2 55d514ba-3858-11e2-91a7-12313d08e01f
id.partner id.service
1 0CB48A664E514CA48D378D152574EDBB BBTV (CBD46B77)
2 0CB48A664E514CA48D378D152574EDBB BBTV (CBD46B77)
device.make device.model device.info device.serial
1 Roku 3050X Roku;3050X 12C241003940
2 Roku 3100X Roku;3100X 13C2AE061481
device.version device.uuid
1 2.5.0.7 51ce2255-62ad-5778-b2d7-b9543c1476c6
2 2.5.0.37 7f5654d5-3aa7-5a5f-bb2b-8084da358942
device.os device.os_version device.platform
1 Linux 2.6.35 Roku
2 Linux 2.6.35 Roku
device.platform_vendor device.platform_version
1 Move Networks 1.0.0.20130329
2 Move Networks 1.0.0.20130615
device.licenses user.type ip.provider ip.postal_code
1 NA Subscriber netacuity 48154
2 NA Subscriber netacuity 98115
ip.dma ip.dma_name ip.city ip.country ip.region
1 505 unknown livonia united states michigan
2 819 unknown seattle united states washington
ip.continent ip.isp ip.asn
1 north america bright house networks llc 0
2 north america qwest communications company llc 0
ip.asn_owner clip.id clip.pub asset.id asset.pub
1 ? 1338713 CBD46B77 524768 CBD46B77
2 ? 1339170 CBD46B77 522015 CBD46B77
asset.length_ms asset.guid
1 1800000 b7c12c09dc5aec832e142a00b0f191fa
2 7200000 c6938cfa200a21e90dce41f5ed131cc2
asset.title asset.type asset.adult
1 Diya Aur Bati Hum captured FALSE
2 Spark Top 20 captured FALSE
asset.franchise_guid asset.franchise_title
1 941496e1452b4fce9acfe7b3339924eb Diya Aur Bati Hum
2 <NA> <NA>
container.id container.pub
1 14 CBD46B77
2 277 CBD46B77
container.guid container.title
1 3caa6afd715e4c57ac4750d29e449a9c SPLUS
2 03e3a689e245457bba2f98c30ef931fa BIGMGC
usage.elapsed_ms usage.viewed_ms usage.stage
1 2312 392 mainVideo
2 NA NA mainVideo
exp.idle_ms exp.stalls exp.stalled_ms
1 350 0 0
2 5772 NA NA
exp.frame_renders exp.frame_drops exp.ghost_session_ms
1 0 0 0
2 NA NA NA
exp.ghost_sessions exp.qmx_stale_ms exp.qmx_error_ms
1 0 0 0
2 NA NA NA
exp.qss_late_ms exp.qss_error_ms exp.fom
1 0 0 0
2 NA NA NA
exp.fom_weight data.dl_bytes data.ul_bytes http.oks
1 0 228 406 2
2 NA NA NA NA
http.errors http.timeouts net.throughput http.slows
1 0 0 8977 0
2 NA NA NA NA
data.bitrate_mean data.bitrate_stddev
1 1968 0
2 NA NA
data.bitrate_median data.bitrate_modes data.streamlets
1 1950 1950 1
2 NA NA NA
data.late_streamlets data.all_streamlets
1 0 0
2 NA NA
data.bf_streamlets data.ab_streamlets
1 0 0
2 NA NA