使用Knitr从嵌套列表中解决数据帧问题

时间:2017-12-18 15:03:48

标签: r knitr kable

我有一个名为 ct2 的列表来自webscrape。我可以使用以下代码在非嵌套列表上命名列。但是我想添加坐标,当我调用 head(ct2 $ business)时,我看到 ct2 $ business $ coordinates $ latitude ct2 $ business $ coordinates $经度。我似乎无法将这些输出到数据帧而不会收到错误。所以我不确定我做错了什么。下面是用于分配列的代码。下面是我尝试添加坐标时收到的错误。

biz_info <- ct2$businesses %>% 
map_df(`[`, c("name", "id", "phone", "review_count","rating","url")) 
biz_info %>% knitr::kable()

当我添加坐标时,我在尝试执行时得到错误

biz_info <- ct2$businesses %>% 
map_df(`[`, c("name", "id", "phone", 
"review_count","rating","url","coordinates")) 
Error in bind_rows_(x, .id) : Argument 7 must be length 1, not 2
biz_info %>% knitr::kable()

**编辑以添加数据示例

dput(head(ct2$businesses))
list(structure(list(id = "mcdonalds-hartford-7", name = "McDonald's", 
image_url = "https://s3-
media3.fl.yelpcdn.com/bphoto/hgpL9l7A10vRoWy84NPV_g/o.jpg", 
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-7?", 
review_count = 4L, categories = list(structure(list(alias = "hotdogs", 
    title = "Fast Food"), .Names = c("alias", "title")), 
    structure(list(alias = "burgers", title = "Burgers"), .Names = c("alias", 
    "title"))), rating = 3.5, coordinates = structure(list(
    latitude = 41.738611, longitude = -72.65921), .Names = c("latitude", 
"longitude")), transactions = list(), price = "$", location = structure(list(
    address1 = "76 Brainard Rd", address2 = "", address3 = "", 
    city = "Hartford", zip_code = "06114", country = "US", 
    state = "CT", display_address = list("76 Brainard Rd", 
        "Hartford, CT 06114")), .Names = c("address1", "address2", 
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18602477300", display_phone = "(860) 247-7300", 
distance = 3155.923625766), .Names = c("id", "name", "image_url", 
"is_closed", "url", "review_count", "categories", "rating", "coordinates", 
"transactions", "price", "location", "phone", "display_phone", 
"distance")), structure(list(id = "mcdonalds-restaurants-hartford-3", 
name = "McDonalds Restaurants", image_url = "", is_closed = FALSE, 
url = "https://www.yelp.com/biz/mcdonalds-restaurants-hartford-3?", 
review_count = 2L, categories = list(structure(list(alias = "restaurants", 
    title = "Restaurants"), .Names = c("alias", "title"))), 
rating = 2.5, coordinates = structure(list(latitude = 41.75251, 
    longitude = -72.71448), .Names = c("latitude", "longitude"
)), transactions = list(), location = structure(list(address1 = "214 Prospect Ave", 
    address2 = "", address3 = "", city = "Hartford", zip_code = "06106", 
    country = "US", state = "CT", display_address = list(
        "214 Prospect Ave", "Hartford, CT 06106")), .Names = c("address1", 
"address2", "address3", "city", "zip_code", "country", "state", 
"display_address")), phone = "+18605238859", display_phone = "(860) 523-8859", 
distance = 2591.628349648), .Names = c("id", "name", "image_url", 
"is_closed", "url", "review_count", "categories", "rating", "coordinates", 
"transactions", "location", "phone", "display_phone", "distance"
)), structure(list(id = "mcdonalds-hartford-9", name = "McDonald's", 
image_url = "https://s3-media4.fl.yelpcdn.com/bphoto/49EjiRF2Yb91rBV6wbuHZw/o.jpg", 
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-9?", 
review_count = 9L, categories = list(structure(list(alias = "burgers", 
    title = "Burgers"), .Names = c("alias", "title")), structure(list(
    alias = "hotdogs", title = "Fast Food"), .Names = c("alias", 
"title"))), rating = 2.5, coordinates = structure(list(latitude = 41.75251, 
    longitude = -72.71448), .Names = c("latitude", "longitude"
)), transactions = list(), price = "$", location = structure(list(
    address1 = "214 Prospect Ave", address2 = "", address3 = "", 
    city = "Hartford", zip_code = "06106", country = "US", 
    state = "CT", display_address = list("214 Prospect Ave", 
        "Hartford, CT 06106")), .Names = c("address1", "address2", 
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18605235303", display_phone = "(860) 523-5303", 
distance = 2591.628349648), .Names = c("id", "name", "image_url", 
"is_closed", "url", "review_count", "categories", "rating", "coordinates", 
"transactions", "price", "location", "phone", "display_phone", 
"distance")), structure(list(id = "mcdonalds-hartford-10", name = "McDonald's", 
image_url = "https://s3-media4.fl.yelpcdn.com/bphoto/da-sL4n1xX2VkLbqIWr5hw/o.jpg", 
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-10?", 
review_count = 9L, categories = list(structure(list(alias = "burgers", 
    title = "Burgers"), .Names = c("alias", "title")), structure(list(
    alias = "hotdogs", title = "Fast Food"), .Names = c("alias", 
"title"))), rating = 1, coordinates = structure(list(latitude = 41.7573503, 
    longitude = -72.68223), .Names = c("latitude", "longitude"
)), transactions = list(), location = structure(list(address1 = "172 Washington St", 
    address2 = "", address3 = "", city = "Hartford", zip_code = "06106", 
    country = "US", state = "CT", display_address = list(
        "172 Washington St", "Hartford, CT 06106")), .Names = c("address1", 
"address2", "address3", "city", "zip_code", "country", "state", 
"display_address")), phone = "+18605602292", display_phone = "(860) 560-2292", 
distance = 374.2938759334), .Names = c("id", "name", "image_url", 
"is_closed", "url", "review_count", "categories", "rating", "coordinates", 
"transactions", "location", "phone", "display_phone", "distance"
)), structure(list(id = "mcdonalds-hartford-12", name = "McDonald's", 
image_url = "https://s3-media4.fl.yelpcdn.com/bphoto/B0SDIM3ylqAN6hOgOkyybQ/o.jpg", 
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-12", 
review_count = 4L, categories = list(structure(list(alias = "hotdogs", 
    title = "Fast Food"), .Names = c("alias", "title")), 
    structure(list(alias = "burgers", title = "Burgers"), .Names = c("alias", 
    "title"))), rating = 2, coordinates = structure(list(
    latitude = 41.7828446485687, longitude = -72.6981766090747), .Names = c("latitude", 
"longitude")), transactions = list(), price = "$", location = structure(list(
    address1 = "1303 Albany Ave", address2 = "", address3 = "", 
    city = "Hartford", zip_code = "06112", country = "US", 
    state = "CT", display_address = list("1303 Albany Ave", 
        "Hartford, CT 06112")), .Names = c("address1", "address2", 
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18602473612", display_phone = "(860) 247-3612", 
distance = 2730.544003504), .Names = c("id", "name", "image_url", 
"is_closed", "url", "review_count", "categories", "rating", "coordinates", 
"transactions", "price", "location", "phone", "display_phone", 
"distance")), structure(list(id = "mcdonalds-hartford", name = "McDonald's", 
image_url = "https://s3-media2.fl.yelpcdn.com/bphoto/rnWHncxwC1qK5T9KvSIVBA/o.jpg", 
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford?", 
review_count = 8L, categories = list(structure(list(alias = "hotdogs", 
    title = "Fast Food"), .Names = c("alias", "title")), 
    structure(list(alias = "burgers", title = "Burgers"), .Names = c("alias", 
    "title"))), rating = 1.5, coordinates = structure(list(
    latitude = 41.7876, longitude = -72.66214), .Names = c("latitude", 
"longitude")), transactions = list(), price = "$", location = structure(list(
    address1 = "98 Weston St", address2 = "", address3 = "", 
    city = "Hartford", zip_code = "06120", country = "US", 
    state = "CT", display_address = list("98 Weston St", 
        "Hartford, CT 06120")), .Names = c("address1", "address2", 
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18607240200", display_phone = "(860) 724-0200", 
distance = 3622.578151942), .Names = c("id", "name", "image_url", 
"is_closed", "url", "review_count", "categories", "rating", "coordinates", 
"transactions", "price", "location", "phone", "display_phone", 
"distance")))

1 个答案:

答案 0 :(得分:1)

为了让它发挥作用,周围很多争吵。问题在于嵌套列表,例如包含多个值的def recursive_purge(dir, pattern): for f in os.listdir(dir): if os.path.isdir(os.path.join(dir, f)): recursive_purge(os.path.join(dir, f), pattern) elif re.search(pattern, os.path.join(dir, f)): os.remove(os.path.join(dir, f)) 。我想你可以通过在每个列表上调用$location来解决这个问题。有点破解,但最终会得到一个更易于处理的数据结构。试试这个:

glue::collapse(sep = ";")

library(tidyverse) extractor <- function(list_element){ map(list_element, glue::collapse, sep = ";") } nested_list %>% map(extractor) %>% transpose() %>% as_tibble() %>% View() 是您在问题中包含的数据集的一部分。