我有一个名为 ct2 的列表来自webscrape。我可以使用以下代码在非嵌套列表上命名列。但是我想添加坐标,当我调用 head(ct2 $ business)时,我看到 ct2 $ business $ coordinates $ latitude 和 ct2 $ business $ coordinates $经度。我似乎无法将这些输出到数据帧而不会收到错误。所以我不确定我做错了什么。下面是用于分配列的代码。下面是我尝试添加坐标时收到的错误。
biz_info <- ct2$businesses %>%
map_df(`[`, c("name", "id", "phone", "review_count","rating","url"))
biz_info %>% knitr::kable()
当我添加坐标时,我在尝试执行时得到错误
biz_info <- ct2$businesses %>%
map_df(`[`, c("name", "id", "phone",
"review_count","rating","url","coordinates"))
Error in bind_rows_(x, .id) : Argument 7 must be length 1, not 2
biz_info %>% knitr::kable()
**编辑以添加数据示例
dput(head(ct2$businesses))
list(structure(list(id = "mcdonalds-hartford-7", name = "McDonald's",
image_url = "https://s3-
media3.fl.yelpcdn.com/bphoto/hgpL9l7A10vRoWy84NPV_g/o.jpg",
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-7?",
review_count = 4L, categories = list(structure(list(alias = "hotdogs",
title = "Fast Food"), .Names = c("alias", "title")),
structure(list(alias = "burgers", title = "Burgers"), .Names = c("alias",
"title"))), rating = 3.5, coordinates = structure(list(
latitude = 41.738611, longitude = -72.65921), .Names = c("latitude",
"longitude")), transactions = list(), price = "$", location = structure(list(
address1 = "76 Brainard Rd", address2 = "", address3 = "",
city = "Hartford", zip_code = "06114", country = "US",
state = "CT", display_address = list("76 Brainard Rd",
"Hartford, CT 06114")), .Names = c("address1", "address2",
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18602477300", display_phone = "(860) 247-7300",
distance = 3155.923625766), .Names = c("id", "name", "image_url",
"is_closed", "url", "review_count", "categories", "rating", "coordinates",
"transactions", "price", "location", "phone", "display_phone",
"distance")), structure(list(id = "mcdonalds-restaurants-hartford-3",
name = "McDonalds Restaurants", image_url = "", is_closed = FALSE,
url = "https://www.yelp.com/biz/mcdonalds-restaurants-hartford-3?",
review_count = 2L, categories = list(structure(list(alias = "restaurants",
title = "Restaurants"), .Names = c("alias", "title"))),
rating = 2.5, coordinates = structure(list(latitude = 41.75251,
longitude = -72.71448), .Names = c("latitude", "longitude"
)), transactions = list(), location = structure(list(address1 = "214 Prospect Ave",
address2 = "", address3 = "", city = "Hartford", zip_code = "06106",
country = "US", state = "CT", display_address = list(
"214 Prospect Ave", "Hartford, CT 06106")), .Names = c("address1",
"address2", "address3", "city", "zip_code", "country", "state",
"display_address")), phone = "+18605238859", display_phone = "(860) 523-8859",
distance = 2591.628349648), .Names = c("id", "name", "image_url",
"is_closed", "url", "review_count", "categories", "rating", "coordinates",
"transactions", "location", "phone", "display_phone", "distance"
)), structure(list(id = "mcdonalds-hartford-9", name = "McDonald's",
image_url = "https://s3-media4.fl.yelpcdn.com/bphoto/49EjiRF2Yb91rBV6wbuHZw/o.jpg",
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-9?",
review_count = 9L, categories = list(structure(list(alias = "burgers",
title = "Burgers"), .Names = c("alias", "title")), structure(list(
alias = "hotdogs", title = "Fast Food"), .Names = c("alias",
"title"))), rating = 2.5, coordinates = structure(list(latitude = 41.75251,
longitude = -72.71448), .Names = c("latitude", "longitude"
)), transactions = list(), price = "$", location = structure(list(
address1 = "214 Prospect Ave", address2 = "", address3 = "",
city = "Hartford", zip_code = "06106", country = "US",
state = "CT", display_address = list("214 Prospect Ave",
"Hartford, CT 06106")), .Names = c("address1", "address2",
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18605235303", display_phone = "(860) 523-5303",
distance = 2591.628349648), .Names = c("id", "name", "image_url",
"is_closed", "url", "review_count", "categories", "rating", "coordinates",
"transactions", "price", "location", "phone", "display_phone",
"distance")), structure(list(id = "mcdonalds-hartford-10", name = "McDonald's",
image_url = "https://s3-media4.fl.yelpcdn.com/bphoto/da-sL4n1xX2VkLbqIWr5hw/o.jpg",
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-10?",
review_count = 9L, categories = list(structure(list(alias = "burgers",
title = "Burgers"), .Names = c("alias", "title")), structure(list(
alias = "hotdogs", title = "Fast Food"), .Names = c("alias",
"title"))), rating = 1, coordinates = structure(list(latitude = 41.7573503,
longitude = -72.68223), .Names = c("latitude", "longitude"
)), transactions = list(), location = structure(list(address1 = "172 Washington St",
address2 = "", address3 = "", city = "Hartford", zip_code = "06106",
country = "US", state = "CT", display_address = list(
"172 Washington St", "Hartford, CT 06106")), .Names = c("address1",
"address2", "address3", "city", "zip_code", "country", "state",
"display_address")), phone = "+18605602292", display_phone = "(860) 560-2292",
distance = 374.2938759334), .Names = c("id", "name", "image_url",
"is_closed", "url", "review_count", "categories", "rating", "coordinates",
"transactions", "location", "phone", "display_phone", "distance"
)), structure(list(id = "mcdonalds-hartford-12", name = "McDonald's",
image_url = "https://s3-media4.fl.yelpcdn.com/bphoto/B0SDIM3ylqAN6hOgOkyybQ/o.jpg",
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford-12",
review_count = 4L, categories = list(structure(list(alias = "hotdogs",
title = "Fast Food"), .Names = c("alias", "title")),
structure(list(alias = "burgers", title = "Burgers"), .Names = c("alias",
"title"))), rating = 2, coordinates = structure(list(
latitude = 41.7828446485687, longitude = -72.6981766090747), .Names = c("latitude",
"longitude")), transactions = list(), price = "$", location = structure(list(
address1 = "1303 Albany Ave", address2 = "", address3 = "",
city = "Hartford", zip_code = "06112", country = "US",
state = "CT", display_address = list("1303 Albany Ave",
"Hartford, CT 06112")), .Names = c("address1", "address2",
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18602473612", display_phone = "(860) 247-3612",
distance = 2730.544003504), .Names = c("id", "name", "image_url",
"is_closed", "url", "review_count", "categories", "rating", "coordinates",
"transactions", "price", "location", "phone", "display_phone",
"distance")), structure(list(id = "mcdonalds-hartford", name = "McDonald's",
image_url = "https://s3-media2.fl.yelpcdn.com/bphoto/rnWHncxwC1qK5T9KvSIVBA/o.jpg",
is_closed = FALSE, url = "https://www.yelp.com/biz/mcdonalds-hartford?",
review_count = 8L, categories = list(structure(list(alias = "hotdogs",
title = "Fast Food"), .Names = c("alias", "title")),
structure(list(alias = "burgers", title = "Burgers"), .Names = c("alias",
"title"))), rating = 1.5, coordinates = structure(list(
latitude = 41.7876, longitude = -72.66214), .Names = c("latitude",
"longitude")), transactions = list(), price = "$", location = structure(list(
address1 = "98 Weston St", address2 = "", address3 = "",
city = "Hartford", zip_code = "06120", country = "US",
state = "CT", display_address = list("98 Weston St",
"Hartford, CT 06120")), .Names = c("address1", "address2",
"address3", "city", "zip_code", "country", "state", "display_address"
)), phone = "+18607240200", display_phone = "(860) 724-0200",
distance = 3622.578151942), .Names = c("id", "name", "image_url",
"is_closed", "url", "review_count", "categories", "rating", "coordinates",
"transactions", "price", "location", "phone", "display_phone",
"distance")))
答案 0 :(得分:1)
为了让它发挥作用,周围很多争吵。问题在于嵌套列表,例如包含多个值的def recursive_purge(dir, pattern):
for f in os.listdir(dir):
if os.path.isdir(os.path.join(dir, f)):
recursive_purge(os.path.join(dir, f), pattern)
elif re.search(pattern, os.path.join(dir, f)):
os.remove(os.path.join(dir, f))
。我想你可以通过在每个列表上调用$location
来解决这个问题。有点破解,但最终会得到一个更易于处理的数据结构。试试这个:
glue::collapse(sep = ";")
library(tidyverse)
extractor <- function(list_element){
map(list_element, glue::collapse, sep = ";")
}
nested_list %>%
map(extractor) %>%
transpose() %>%
as_tibble() %>%
View()
是您在问题中包含的数据集的一部分。