我有一个JSON文件'data.json',其中包含有关不同景点的信息。
data = lapply(readLines("data.json"), fromJSON)
这会创建一个具有不同长度的嵌套列表。这是前4行的样本。
list(structure(list(payload = structure(list(existence_full = 1L,
geo_virtual = "[\"56.9459720|-2.1971226|20|within_50m|4\"]",
latitude = "56.945972", locality = "Stonehaven", `_records_touched` = "{\"crawl\":8,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":0,\"user_submission\":0,\"tdc\":0,\"gov\":0}",
address = "The Lodge, Dunottar", email = "dunnottarcastle@btconnect.com",
existence_ml = 0.569423821765872, domain_aggregate = "",
name = "Dunnottar Castle", search_tags = c("Dunnottar Castle Aberdeenshire",
"Dunotter Castle"), admin_region = "Scotland", existence = 1L,
category_labels = structure(c("Landmarks", "Buildings and Structures"
), .Dim = 1:2), post_town = "Stonehaven", region = "Kincardineshire",
review_count = "719", geocode_level = "within_50m", tel = "01569 762173",
placerank = 65L, longitude = "-2.197123", placerank_ml = 37.2791607346447,
fax = "01330 860325", category_ids_text_search = "", website = "http://www.dunnottarcastle.co.uk",
status = "1", geocode_confidence = "20", postcode = "AB39 2TL",
category_ids = 108L, country = "gb", `_geocode_quality` = "4"), .Names = c("existence_full",
"geo_virtual", "latitude", "locality", "_records_touched", "address",
"email", "existence_ml", "domain_aggregate", "name", "search_tags",
"admin_region", "existence", "category_labels", "post_town",
"region", "review_count", "geocode_level", "tel", "placerank",
"longitude", "placerank_ml", "fax", "category_ids_text_search",
"website", "status", "geocode_confidence", "postcode", "category_ids",
"country", "_geocode_quality")), uuid = "3867aaf3-12ab-434f-b12b-5d627b3359c3"), .Names = c("payload",
"uuid")), structure(list(payload = structure(list(existence_full = 1L,
geo_virtual = "[\"56.237480|-5.073578|20|within_50m|4\"]",
latitude = "56.237480", locality = "Inveraray", `_records_touched` = "{\"crawl\":11,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":0,\"user_submission\":0,\"tdc\":0,\"gov\":0}",
address = "Cherry Park", email = "enquiries@inveraray-castle.com",
longitude = "-5.073578", domain_aggregate = "", name = "Inveraray Castle",
admin_region = "Scotland", search_tags = c("Inveraray Castle Tea Room",
"Inverary Castle"), existence = 1L, category_labels = structure(c("Social",
"Food and Dining", "Restaurants"), .Dim = c(1L, 3L)), region = "Argyll",
review_count = "532", geocode_level = "within_50m", tel = "01499 302203",
placerank = 67L, post_town = "Inveraray", placerank_ml = 41.1997808735227,
fax = "01499 302421", category_ids_text_search = "", website = "http://www.inveraray-castle.com",
status = "1", geocode_confidence = "20", postcode = "PA32 8XE",
category_ids = 347L, country = "gb", `_geocode_quality` = "4",
existence_ml = 0.791488110284778), .Names = c("existence_full",
"geo_virtual", "latitude", "locality", "_records_touched", "address",
"email", "longitude", "domain_aggregate", "name", "admin_region",
"search_tags", "existence", "category_labels", "region", "review_count",
"geocode_level", "tel", "placerank", "post_town", "placerank_ml",
"fax", "category_ids_text_search", "website", "status", "geocode_confidence",
"postcode", "category_ids", "country", "_geocode_quality", "existence_ml"
)), uuid = "8278ab80-2cd1-4dbd-9685-0d0036b681eb"), .Names = c("payload",
"uuid")), structure(list(payload = structure(list(existence_full = 1L,
geo_virtual = "[\"51.483872|-0.606820|100|rooftop|2\"]",
latitude = "51.483872", locality = "Windsor Castle", hours_display = "Mon-Sat 11:30 AM-11:00 PM; Sun 12:00 PM-11:00 PM",
`_records_touched` = "{\"crawl\":7,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":2,\"user_submission\":0,\"tdc\":0,\"gov\":0}",
address = "", longitude = "-0.606820", domain_aggregate = "",
name = "Windsor Castle", admin_region = "England", search_tags = c("The Windsor Castle",
"The Windsor Castle Pub", "The Windsor Castle Public House",
"Pub Food", "British"), existence = 1L, category_labels = structure(c("Landmarks",
"Buildings and Structures"), .Dim = 1:2), region = "Berkshire",
review_count = "", geocode_level = "rooftop", tel = "020 7766 7304",
placerank = 62L, post_town = "Windsor", placerank_ml = 28.1160845346327,
fax = "01753 832290", category_ids_text_search = "", website = "http://www.royalcollection.org.uk/visit/windsorcastle",
status = "1", hours = "{\"monday\":[[\"11:30\",\"23:00\"]],\"tuesday\":[[\"11:30\",\"23:00\"]],\"wednesday\":[[\"11:30\",\"23:00\"]],\"thursday\":[[\"11:30\",\"23:00\"]],\"friday\":[[\"11:30\",\"23:00\"]],\"saturday\":[[\"11:30\",\"23:00\"]],\"sunday\":[[\"12:00\",\"23:00\"]]}",
neighborhood = "Chalvey", geocode_confidence = "100", postcode = "SL4 1NJ",
category_ids = 108L, country = "gb", `_geocode_quality` = "2",
existence_ml = 0.885705196944165, email = "bookinginfo@royalcollection.org.uk"), .Names = c("existence_full",
"geo_virtual", "latitude", "locality", "hours_display", "_records_touched",
"address", "longitude", "domain_aggregate", "name", "admin_region",
"search_tags", "existence", "category_labels", "region", "review_count",
"geocode_level", "tel", "placerank", "post_town", "placerank_ml",
"fax", "category_ids_text_search", "website", "status", "hours",
"neighborhood", "geocode_confidence", "postcode", "category_ids",
"country", "_geocode_quality", "existence_ml", "email")), uuid = "c5f7d8a9-0851-46ef-8da7-ad55e187d3a8"), .Names = c("payload",
"uuid")), structure(list(payload = structure(list(existence_full = 1L,
category_ids_text_search = "", placerank_ml = 31.9857184762157,
longitude = "-2.191955", name = "Pitmedden Garden", domain_aggregate = "",
admin_region = "Scotland", languages = "English", region = "Aberdeenshire",
review_count = "2", geocode_level = "rooftop", tel = "01651 842352",
placerank = 57L, post_town = "Ellon", category_labels = structure(c("Landmarks",
"Gardens"), .Dim = 1:2), existence = 1L, fax = "0844 493 2102",
website = "http://www.nts.org.uk/Property/Pitmedden-Garden",
status = "1", geocode_confidence = "100", postcode = "AB41 7PD",
country = "gb", category_ids = 109L, `_geocode_quality` = "4",
existence_ml = 0.849871115334588, email = "information@nts.org.uk",
address = "", `_records_touched` = "{\"crawl\":6,\"lssi\":0,\"polygon_centroid\":0,\"geocoder\":0,\"user_submission\":0,\"tdc\":0,\"gov\":0}",
locality = "Pitmedden", latitude = "57.343233", geo_virtual = "[\"57.343233|-2.191955|100|rooftop|4\"]"), .Names = c("existence_full",
"category_ids_text_search", "placerank_ml", "longitude", "name",
"domain_aggregate", "admin_region", "languages", "region", "review_count",
"geocode_level", "tel", "placerank", "post_town", "category_labels",
"existence", "fax", "website", "status", "geocode_confidence",
"postcode", "country", "category_ids", "_geocode_quality", "existence_ml",
"email", "address", "_records_touched", "locality", "latitude",
"geo_virtual")), uuid = "bb57a153-740f-42be-aa4d-ae12d4eb57d4"), .Names = c("payload",
"uuid")))
我想通过在列表列表中的不同列中填充值来将其转换为数据框。列表中的每个列表都包含有关特定位置的信息,按uuid
进行分类。因此,数据框中的每一行都将包含有关特定uuid
的信息。对于没有相应值的列,应显示NA。
我尝试使用类似于这个概念的问题中提到的一些方法但是没有成功。
任何想法都将不胜感激!感谢
答案 0 :(得分:0)
它可能有助于对原始数据布局进行更广泛的描述,但这是一个猜测,基于我所看到的那个对象的高级结构。假设structure
被命名为dat
。:
> lapply(dat, names)
[[1]]
[1] "payload" "uuid"
[[2]]
[1] "payload" "uuid"
[[3]]
[1] "payload" "uuid"
[[4]]
[1] "payload" "uuid"
因此将它们提取到数据帧列表
payloads <- lapply(dat, function(x) data.frame(x$payload))
uuids <- lapply(dat, function(x) data.frame(x$uuid))
然后将它们绑在一起&#34;并排#34;
newdat <- mapply( cbind, payloads, uuids)
然后查看维度,以查看单行数据帧是否被正确复制到多行数据帧上。不符合您规格的一项功能是NA。因为&#39; uuids&#39;显然是标识符,cbind
操作会将每个列内容复制到与&#39;有效负载相同长度的列中:
> lapply(payloads, dim)
[[1]]
[1] 2 32
[[2]]
[1] 2 33
[[3]]
[1] 5 35
[[4]]
[1] 1 32
> lapply(uuids, dim)
[[1]]
[1] 1 1
[[2]]
[1] 1 1
[[3]]
[1] 1 1
[[4]]
[1] 1 1
> lapply( mapply( cbind, payloads, uuids), dim)
[[1]]
[1] 2 33
[[2]]
[1] 2 34
[[3]]
[1] 5 36
[[4]]
[1] 1 33
下一级合并可能是将所有数据框组装在彼此之上,因为它们的名称非常相似:
lapply( newdat, names)
[[1]]
[1] "existence_full" "geo_virtual" "latitude"
[4] "locality" "X_records_touched" "address"
[7] "email" "existence_ml" "domain_aggregate"
[10] "name" "search_tags" "admin_region"
[13] "existence" "category_labels.1" "category_labels.2"
[16] "post_town" "region" "review_count"
[19] "geocode_level" "tel" "placerank"
[22] "longitude" "placerank_ml" "fax"
[25] "category_ids_text_search" "website" "status"
[28] "geocode_confidence" "postcode" "category_ids"
[31] "country" "X_geocode_quality" "x.uuid"
[[2]]
[1] "existence_full" "geo_virtual" "latitude"
[4] "locality" "X_records_touched" "address"
[7] "email" "longitude" "domain_aggregate"
[10] "name" "admin_region" "search_tags"
[13] "existence" "category_labels.1" "category_labels.2"
[16] "category_labels.3" "region" "review_count"
[19] "geocode_level" "tel" "placerank"
[22] "post_town" "placerank_ml" "fax"
[25] "category_ids_text_search" "website" "status"
[28] "geocode_confidence" "postcode" "category_ids"
[31] "country" "X_geocode_quality" "existence_ml"
[34] "x.uuid"
[[3]]
[1] "existence_full" "geo_virtual" "latitude"
[4] "locality" "hours_display" "X_records_touched"
[7] "address" "longitude" "domain_aggregate"
[10] "name" "admin_region" "search_tags"
[13] "existence" "category_labels.1" "category_labels.2"
[16] "region" "review_count" "geocode_level"
[19] "tel" "placerank" "post_town"
[22] "placerank_ml" "fax" "category_ids_text_search"
[25] "website" "status" "hours"
[28] "neighborhood" "geocode_confidence" "postcode"
[31] "category_ids" "country" "X_geocode_quality"
[34] "existence_ml" "email" "x.uuid"
[[4]]
[1] "existence_full" "category_ids_text_search" "placerank_ml"
[4] "longitude" "name" "domain_aggregate"
[7] "admin_region" "languages" "region"
[10] "review_count" "geocode_level" "tel"
[13] "placerank" "post_town" "category_labels.1"
[16] "category_labels.2" "existence" "fax"
[19] "website" "status" "geocode_confidence"
[22] "postcode" "country" "category_ids"
[25] "X_geocode_quality" "existence_ml" "email"
[28] "address" "X_records_touched" "locality"
[31] "latitude" "geo_virtual" "x.uuid"
Hadley的rbind.fill
包中的plyr
函数可以有效地执行此操作:
install.packages("plyr")
newdat3 <- do.call(plyr::rbind.fill, newdat)
newdat3
所以看一下几列,这似乎满足了你的要求:
> newdat3[ , c("locality", "category_labels.3", "neighborhood")]
locality category_labels.3 neighborhood
1 Stonehaven <NA> <NA>
2 Stonehaven <NA> <NA>
3 Inveraray Restaurants <NA>
4 Inveraray Restaurants <NA>
5 Windsor Castle <NA> Chalvey
6 Windsor Castle <NA> Chalvey
7 Windsor Castle <NA> Chalvey
8 Windsor Castle <NA> Chalvey
9 Windsor Castle <NA> Chalvey
10 Pitmedden <NA> <NA>