从R中的嵌套列表中提取数据

时间:2017-07-25 17:36:30

标签: r lapply google-geocoding-api googleway

我已经从google_reverse_code API下载了一个地址列表,但是有一个包含纬度和经度信息的地方列表,因为我在R中很新。我不知道如何提取有用的信息。下载数据库的所有代码都在问题的底部。

列表的结构一般是这个。

  prueba <- sapply(direccion1, function(x){
uno <- unlist(x[[1]])
})

我需要信息作为数据框执行我的分析。具体信息是c(纬度,经度,formatted_address,place_id)

我写的代码是:

Error in (function (..., row.names = NULL, 
  check.rows = FALSE, check.names = TRUE,  : 
    arguments imply differing number of rows: 40, 32, 37, 44, 36, 0, 41, 28, 39, 
  47, 43, 35, 48

pureba2&lt; - data.frame(prueba)

我收到以下错误: # CRE FILES library(easypackages) my_packages <- c("ggmap","maps","mapdata","rlist","readr", "tidyverse", "lubridate", "stringr", "rebus", "stringi", "purrr", "geosphere", "XML", "RCurl", "xml2") libraries(my_packages) # Set link to website link1 <- ("https://publicacionexterna.azurewebsites.net/publicaciones/prices") # Get data from webpage data_prices <- getURL(link1) # Parse XML data xmlfile <- xmlParse(data_prices) # Get place nodes places <- getNodeSet(xmlfile, "//place") # Get values for each place values <- lapply(places, function(x){ # Get current place id p_id <- xmlAttrs(x) # Get values for each gas type for current place newrows <- lapply(xmlChildren(x), function(y){ # Get type and update time attrs <- xmlAttrs(y) # Get price value price <- xmlValue(y) names(price) <- "price" # Return values return(c(p_id, attrs, price) ) }) # Combine rows to single list newrows <- do.call(rbind, newrows) # Return rows return(newrows) }) # Combine all values into a single dataframe datosDePrecios <- as.data.frame(do.call(rbind, values), stringsAsFactors = FALSE) # Re-set row names for dataframe row.names(datosDePrecios) <- c(1:nrow(datosDePrecios)) # Set link to website to the places file link2 <- ("https://publicacionexterna.azurewebsites.net/publicaciones/places") data_places <- read_xml(link2) datos_id <- data_places %>% xml_find_all("//place") %>% xml_attr("place_id") datos_name <- data_places %>% xml_find_all("//name") %>% xml_text("name") datos_brand <- data_places %>% xml_find_all("//brand") %>% xml_text("brand") datos_cre_id <- data_places %>% xml_find_all("//cre_id") %>% xml_text("cre_id") datos_category <- data_places %>% xml_find_all("//category") %>% xml_text("category") datos_adress_street <- data_places %>% xml_find_all("//address_street") %>% xml_text("adress_street") datos_longitud <- data_places %>% xml_find_all("//x") %>% xml_text("x") datos_latitud <- data_places %>% xml_find_all("//y") %>% xml_text("y") datosDeLugares <- data.frame(datos_id, datos_name, datos_brand, datos_cre_id, datos_category, datos_adress_street, datos_latitud, datos_longitud) colnames(datosDeLugares) <- c("place_id", "name", "brand","cre_id", "category", "adress_street", "Latitude", "Longitude") rm(data_prices,places,values,xmlfile,data_places, datos_adress_street, datos_brand, datos_category, datos_cre_id, datos_id, datos_name, datos_longitud, datos_latitud) rm(results, results2) 其他代码不起作用。

下载包含经度和纬度的数据的代码如下:

datosDePrecios <- datosDePrecios %>%
  data.frame(datosDePrecios) %>%
  mutate(place_id = as.numeric(place_id))

datosDeLugares <- datosDeLugares %>%
  data.frame(datosDeLugares) %>%
  mutate(place_id = as.numeric(place_id))


baseGeneral <- inner_join(datosDeLugares, datosDePrecios, by = "place_id")


baseGeneral <- baseGeneral %>%
  select(Latitude, Longitude, place_id) %>%
  mutate(Latitude = as.numeric(as.character(Latitude))) %>%
  mutate(Longitude = as.numeric(as.character(Longitude))) 

baseGeneral <- baseGeneral[1:100,]

baseGeneral <- apply(baseGeneral,1 ,function(x) {
  google_reverse_geocode(location = c(x["Latitude"],x["Longitude"]), key = 
key, result_type = "street_address")
})

获取地址信息的代码如下。

find('tag_name')['attribute_name']

感谢您的帮助。 :)

2 个答案:

答案 0 :(得分:2)

您可以使用[[表示法或$

从列表中提取信息

如果我采用?google_reverse_geocode中给出的示例来获得结果

library(googleway)

res <- google_reverse_geocode(location = c(-37.81659, 144.9841),
                              result_type = c("street_address"),
                              location_type = "rooftop",
                              key = key)

纬度/经度信息位于res$results$geometry$location

格式化地址位于res$results$formatted_address

place_id位于res$results$place_id

因此,您可以从这些元素中创建data.frame

data.frame(
  lat = res$results$geometry$location$lat,
  lon = res$results$geometry$location$lng,
  formatted_address = res$results$formatted_address,
  place_id = res$results$place_id
)

如果您有多个结果列表,那么该过程类似,但您需要将其包装在*apply函数(或您喜欢的任何循环机制)中

## a list of locations  
locations <- list(c(-37.81659, 144.9841), c(-37.81827, 144.9671))  

## generating the reverse geocode for each location
lst_res <- lapply(locations, function(x){
  google_reverse_geocode(location = x, key = key)
})

此处,lst_res是地理编码功能的所有结果列表,因此您可以对其进行迭代以提取相关部分

## now we can extract the information 
lst_df <- lapply(lst_res, function(x){
  data.frame(
    lat = x[['results']][['geometry']][['location']][['lat']],
    lon = x[['results']][['geometry']][['location']][['lng']],
    formatted_address = x[['results']][['formatted_address']],
    place_id = x[['results']][['place_id']]
  )
})

此处,lst_df是data.frames列表。如果你想将它们加入一个data.frame,你可以

df <- do.call(rbind, lst_df)

## et voila!
head(df)
# lat      lon
# 1 -37.81647 144.9841
# 2 -37.81659 144.9841
# 3 -37.81300 144.9850
# 4 -37.81363 144.9631
# 5 -37.81614 144.9805
# 6 -37.81005 144.9281
# formatted_address
# 1 Jolimont Station, 175 Wellington Parade, East Melbourne VIC 3002, Austalia
# 2       Jolimont Station, Wellington Cres, East Melbourne VIC 3002, Australia
# 3                                          East Melbourne VIC 3002, Australia
# 4                                                    Melbourne VIC, Australia
# 5                                          East Melbourne VIC 3002, Australia
# 6                                                   Melbourne, VIC, Australia
# place_id
# 1 ChIJSxAubOpC1moRqhRUnMoZV4M
# 2 ChIJIdtrbupC1moRMPT0CXZWBB0
# 3 ChIJz25SvMFC1moRAOiMIXVWBAU
# 4 ChIJ90260rVG1moRkM2MIXVWBAQ
# 5 ChIJG74w4Upd1moRsDQuRnhWBBw
# 6 ChIJv_FYgkNd1moRpxLuRXZURFs

答案 1 :(得分:1)

我认为您使用unlist()时遇到的问题是默认recursive=TRUE。因此,它可能会将您的数据框列入未列出状态,并且不会列出数据框的列表列,这会让您感到困惑。

所以你可以尝试unlist(... recursive=FALSE),但是如果我理解你的目标是什么,那么bind_rows()可以解决这个问题,只需从你的列表中提取$ results。我猜你的大名单中的每个元素都包含一个名为results的数据框。

results <- lapply(YOUR_BIG_FAT_LIST, function(x) {
    df = x$results
    as.data.frame(address = df$formatted_address, 
        id = df$place_id, 
        lat = df$geometry$location$lat, 
        lng = df$geometry$location$lng)}
information <- bind_rows(results)