我有一些令人不安的人口统计数据点覆盖在美国县地图上。我能够很好地映射,但夏威夷和阿拉斯加没有数据显示。我在over
命令之后确定了问题的根源。我的工作流使用可在此处找到的csv文件(https://www.dropbox.com/s/0arazi2n0adivzc/data.dem2.csv?dl=0)。这是我的工作流程:
#Load dependencies
devtools::install_github("hrbrmstr/albersusa")
library(albersusa)
library(dplyr)
library(rgeos)
library(maptools)
library(ggplot2)
library(ggalt)
library(ggthemes)
library(viridis)
#Read Data
df<-read.csv("data.dem.csv")
#Retreive polygon shapefile
counties_composite() %>%
subset(df$state %in% unique(df$state)) -> usa #Note I've checked here and Alaska is present, see below
#Subset just points and create spatial points object
pts <- df[,4:1]
pts<-as.data.frame(pts)
coordinates(pts) <- ~long+lat
proj4string(pts) <- CRS(proj4string(usa)) #Note I've checked here as welland Alaska is present still, see here
#Spatial overlay
b<-over(pts, usa) #This is where the problem arises: see here
b<-select(b, -state)
b<-bind_cols(df, b)
bind_cols(df, select(over(pts, usa), -state)) %>%
count(fips, wt=count) -> df
usa_map <- fortify(usa, region="tips")
ggplot()+
geom_map(data=usa_map, map=usa_map,
aes(long, lat, map_id=id),
color="#b2b2b2", size=0.05, fill="grey") +
geom_map(data=df, map=usa_map,
aes(fill=n, map_id=fips),
color="#b2b2b2", size=0.05) +
scale_fill_viridis(name="Count", trans="log10") +
gg + coord_map() +
theme_map() +
theme(legend.position=c(0.85, 0.2))
您可能怀疑的最终输出显示阿拉斯加或夏威夷没有数据。我不知道发生了什么,但似乎来自sp包的over
命令是问题的根源。任何建议都非常感谢。
请注意,这是一个与找到Relocating Alaska and Hawaii on thematic map of the USA with ggplot2和How do you create a 50 state map (instead of just lower-48)
的问题不同的问题这些问题彼此无关。这不是重复的。第一个问题是关于夏威夷和阿拉斯加实际多边形的位置,正如你从我的地图中看到的那样,我没有这个问题。第二个链接是关于获得包括夏威夷和阿拉斯加的地图。同样,我的地图包括这两个,但在我的数据处理工作流程的某处,这两个数据被删除(特别是覆盖功能)。请不要标记为重复。
答案 0 :(得分:3)
你需要做的比以前的答案多一点,因为复合形状文件 - 按照它的定义 - 移动阿拉斯加&amp;夏威夷从他们原来的位置开始,这会使over()
在尝试将点与多边形匹配时错过它们。它很容易解决:
library(albersusa) # devtools::install_github("hrbrmstr/albersusa)
library(readr)
library(dplyr)
library(rgeos)
library(rgdal)
library(maptools)
library(ggplot2)
library(ggalt)
library(ggthemes)
library(viridis)
df <- read_csv("data.dem2.csv")
# need this for the composite map & no need to subset
usa <- counties_composite()
# need this for the "over" since the composite map totally
# messes with the lon/lat positions of alaska & hawaii
URL <- "http://eric.clst.org/wupl/Stuff/gz_2010_us_050_00_500k.json"
fil <- basename(URL)
if (!file.exists(fil)) download.file(URL, fil)
orig_counties <- readOGR(fil, "OGRGeoJSON", stringsAsFactors=FALSE)
# your new csv has an extra column at the beginning
pts <- as.data.frame(df[,3:2])
coordinates(pts) <- ~long+lat
proj4string(pts) <- CRS(proj4string(orig_counties))
# don't need to select out the duplicate col name anymore
# but we do need to create the FIPS code
bind_cols(df, over(pts, orig_counties)) %>%
mutate(fips=sprintf("%s%s", STATE, COUNTY)) %>%
count(fips, wt=count) -> df
usa_map <- fortify(usa, region="fips")
gg <- ggplot()
gg <- gg + geom_map(data=usa_map, map=usa_map,
aes(long, lat, map_id=id),
color="#b2b2b2", size=0.05, fill="white")
gg <- gg + geom_map(data=df, map=usa_map,
aes(fill=n, map_id=fips),
color="#b2b2b2", size=0.05)
gg <- gg + scale_fill_viridis(name="Count", trans="log10")
gg <- gg + coord_proj(us_aeqd_proj)
gg <- gg + theme_map()
gg <- gg + theme(legend.position=c(0.85, 0.2))
gg