我有一个data.table
,其中包含一些州名的缩写和县名。我想得到大约。每行的ggplot2::map_data('county')
坐标。
我可以使用:=
用多行代码顺序执行此操作,但是我只想调用一个函数。
以下是我尝试过的内容:
数据:
library(data.table)
library(ggplot2)
> dput(dt[1:20, .(state, county, prime_mover)])
structure(list(state = c("AZ", "AZ", "CA", "CA", "CA", "CT",
"FL", "IN", "MA", "MA", "MA", "MN", "NJ", "NJ", "NJ", "NY", "NC",
"SC", "TN", "TX"), county = c("Maricopa", "Maricopa", "Los Angeles",
"Orange", "Los Angeles", "Fairfield", "Hillsborough", "Morgan",
"Barnstable", "Nantucket", "Essex", "Dakota", "Cape May", "Salem",
"Middlesex", "Kings", "Buncombe", "Anderson", "Shelby", "Tarrant"
), prime_mover = c("GT", "GT", "CT", "CT", "CT", "CT", "GT",
"CT", "GT", "GT", "GT", "GT", "CT", "GT", "CT", "GT", "CT", "CT",
"CT", "CT")), .Names = c("state", "county", "prime_mover"), row.names = c(NA,
-20L), class = c("data.table", "data.frame"))
coord_data <- as.data.table(map_data('county'))
代码:
getCoords <- function(state, county){
prov <- state.name[grep(state, state.abb)]
ck <- coord_data[region == tolower(prov) & subregion == tolower(county),
.(lon = mean(long), lat = mean(lat))]
return(list(unname(unlist(ck))))
}
# Testing getCoords
> getCoords('AZ', 'Maricopa')
[[1]]
[1] -111.88668 33.58126
错误:
> dt[, c('lon', 'lat') := lapply(.SD, getCoords), .SDcols = c('state', 'county')]
Error in tolower(county) : argument "county" is missing, with no default
In addition: Warning message:
In grep(state, state.abb) :
argument 'pattern' has length > 1 and only the first element will be used
我已经看到以下答案,但无法完全理解我在做什么:
我能够通过其他方式(多行,dplyr
或什至基R)实现我想要的功能,但是我更喜欢使用data.table
方法。
答案 0 :(得分:0)
我要进行两个 update联接:
LifecycleUtils.init(objects.values());
library(data.table) # aggregate coordinates cols <- c("long", "lat") agg_coord <- coord_data[, lapply(.SD, mean), .SDcols = cols, by = .(region, subregion)] # coerce to data.table by reference setDT(dt)[ # 1st update join to append region/state.name .(state = state.abb, state.name = tolower(state.name)), on = "state", region := state.name][ # append subregion , subregion := tolower(county)][ # 2nd update join to append coordinates agg_coord, on = .(region, subregion), (cols) := .(long, lat)][ # remove helper columns , c("region", "subregion") := NULL] # print updated dt dt[]