我正在尝试使用R中的ggplot2
为美国各州创建热图。当我这样做时,我遇到了this和this中详述的同样问题发布,但解决方案 - 使用merge
指定左连接或使用dplyr::left_join
或dplyr::inner_join
似乎不起作用。
以下是我使用my_data
中的数据创建以下情节,其中部分华盛顿,新泽西,纽约丢失或颜色错误,位于情节之下:
library(ggplot2)
library(ggthemes)
str(my_data)
> str(data)
'data.frame': 50 obs. of 2 variables:
$ region : chr "alaska" "alabama" "arkansas" "arizona" ...
$ number_of_tweets: int 0 10967 5130 462 26489 267 1464 2431 9777 7359 ...
states <- map_data("state")
choro <- dplyr::left_join(states, my_data, by = "region")
ggplot() +
geom_polygon(data = choro,
aes(x = long, y = lat, group = region, fill = number_of_tweets)) +
scale_fill_gradient(low = "#deebf7", high = "#3182bd") +
theme_map()
以下是my_data
的内容,如果这有助于重复性:
region,number_of_tweets
alabama,10929
alaska,0
arkansas,5107
arizona,452
california,26299
colorado,265
connecticut,1459
delaware,2418
florida,9666
georgia,7306
hawaii,2486
iowa,29229
idaho,7607
illinois,10221
indiana,20700
kansas,32252
kentucky,11098
louisiana,938
massachusetts,10764
maryland,4091
maine,5770
michigan,47335
minnesota,1079
missouri,1079
mississippi,1273
montana,11606
north carolina,22354
north dakota,6294
nebraska,7319
new hampshire,7185
new jersey,26850
new mexico,0
nevada,7918
new york,16007
ohio,8284
oklahoma,63551
oregon,1120
pennsylvania,908
rhode island,10240
south carolina,6296
south dakota,3559
tennessee,4765
texas,30235
utah,15019
virginia,5541
vermont,16444
washington,7506
wisconsin,7817
wyoming,10496
west virginia,0
答案 0 :(得分:5)
无需加入数据或进行可怕的投影。请注意,您的数据实际上并不是连续的,您应该弄清楚应该如何对其进行规范化(可能会找到每个州估计的推特用户数量并按其标准化,或者每100,000人使用一次)。
library(ggplot2)
library(ggthemes)
library(viridis)
my_data <- structure(list(region = c("alabama", "alaska", "arkansas", "arizona",
"california", "colorado", "connecticut", "delaware", "florida",
"georgia", "hawaii", "iowa", "idaho", "illinois", "indiana",
"kansas", "kentucky", "louisiana", "massachusetts", "maryland",
"maine", "michigan", "minnesota", "missouri", "mississippi",
"montana", "north carolina", "north dakota", "nebraska", "new hampshire",
"new jersey", "new mexico", "nevada", "new york", "ohio", "oklahoma",
"oregon", "pennsylvania", "rhode island", "south carolina", "south dakota",
"tennessee", "texas", "utah", "virginia", "vermont", "washington",
"wisconsin", "wyoming", "west virginia"), number_of_tweets = c(10929L,
0L, 5107L, 452L, 26299L, 265L, 1459L, 2418L, 9666L, 7306L, 2486L,
29229L, 7607L, 10221L, 20700L, 32252L, 11098L, 938L, 10764L,
4091L, 5770L, 47335L, 1079L, 1079L, 1273L, 11606L, 22354L, 6294L,
7319L, 7185L, 26850L, 0L, 7918L, 16007L, 8284L, 63551L, 1120L,
908L, 10240L, 6296L, 3559L, 4765L, 30235L, 15019L, 5541L, 16444L,
7506L, 7817L, 10496L, 0L)), .Names = c("region", "number_of_tweets"
), class = "data.frame", row.names = c(NA, -50L))
states <- map_data("state")
my_data$cut <- as.character(cut(my_data$number_of_tweets,
breaks=pretty(x=my_data$number_of_tweets, n=7),
labels=pretty(x=my_data$number_of_tweets, n=7)[-1]))
my_data$cut <- ifelse(is.na(my_data$cut), 0, my_data$cut)
gg <- ggplot()
gg <- gg + geom_map(data=states, map=states,
aes(x=long, y=lat, map_id=region),
color="white", size=0.1, fill=NA)
gg <- gg + geom_map(data=my_data, map=states,
aes(fill=cut, map_id=region),
color="white", size=0.1)
gg <- gg + scale_fill_viridis(name="# Tweets", discrete=TRUE, begin=0.1, end=0.9)
gg <- gg + coord_map("polyconic")
gg <- gg + theme_map()
gg <- gg + theme(legend.position="right")
gg