我有以下数据框,这些数据框按美国县的收入十分位数和谁赢得了2016年大选而分类:
# A tibble: 1,188 x 5
# Groups: day_month_year, deciles_income [270]
day_month_year deciles_income winner2016 key mean_spend_cases
<date> <int> <chr> <chr> <dbl>
1 2020-01-12 1 Donald Trump mean_spend_all 0.00108
2 2020-01-12 1 Hillary Clinton mean_spend_all 0.0196
3 2020-01-12 2 Donald Trump mean_spend_all -0.000334
4 2020-01-12 2 Hillary Clinton mean_spend_all 0.00664
5 2020-01-12 3 Donald Trump mean_spend_all 0.00807
6 2020-01-12 3 Hillary Clinton mean_spend_all 0.0257
7 2020-01-12 4 Donald Trump mean_spend_all -0.00491
8 2020-01-12 4 Hillary Clinton mean_spend_all -0.0119
9 2020-01-12 5 Donald Trump mean_spend_all 0.000497
10 2020-01-12 5 Hillary Clinton mean_spend_all 0.00001
# … with 1,178 more rows
在关键列中,我融化了mean_spend_all和new_case_rate_07da的变量。我正在尝试创建一个数据图,该图将在新情况下在x轴上显示两条用于发展的线(根据获胜者是特朗普还是克林顿,每条线都有不同的颜色),并指出支出的变化(颜色再次取决于winner2016列)。
然后我要进行多面包装,以便根据县民的收入获得十张图表。最后,我想显示一条最适合我使用stat_smooth()函数的支出变化的线。
理想情况下,该图看起来与此类似,但是增加了案例率:
ggplot(data = group_by(afc, winner2016),
aes(x = afc$day_month_year)) +
geom_point(aes(color = winner2016, y = filter(afc, key == "mean_spend_all")$mean_spend_cases *100)) +
geom_line(aes(color = winner2016, y = filter(afc, key == "new_case_rate_07da")$mean_spend_cases)) +
facet_wrap(afc$deciles_income)+
labs(title = "Change in spending for counties grouped by decile of income",
x = "Decile of a County by income",
y = "Change in consumer spending relative to January 14")+
stat_smooth(aes(color = (afc$winner2016))) +
scale_y_continuous(limits = c(-30,15))
但是,我得到的错误是“美学必须长度为1或与数据(1188)相同:y”,我认为这是因为使用了filter()。
这是结构:
structure(list(day_month_year = structure(c(18301, 18434, 18406,
18301, 18287, 18406, 18350, 18399, 18329, 18308, 18343, 18413,
18308, 18434, 18280, 18273, 18371, 18434, 18273, 18448, 18287,
18434, 18350, 18343, 18427, 18273, 18399, 18273, 18294, 18427
), tzone = "Europe/Prague", class = "Date"), deciles_income = c(9L,
5L, 4L, 6L, 8L, 8L, 2L, 10L, 8L, 2L, 1L, 4L, 8L, 2L, 7L, 6L,
5L, 9L, 8L, 3L, 5L, 8L, 8L, 8L, 9L, 7L, 9L, 6L, 9L, 8L), winner2016 = c("Hillary Clinton",
"Hillary Clinton", "Hillary Clinton", "Donald Trump", "Donald Trump",
"Hillary Clinton", "Donald Trump", "Donald Trump", "Hillary Clinton",
"Donald Trump", "Donald Trump", "Donald Trump", "Donald Trump",
"Hillary Clinton", "Hillary Clinton", "Hillary Clinton", "Hillary Clinton",
"Hillary Clinton", "Hillary Clinton", "Hillary Clinton", "Donald Trump",
"Donald Trump", "Hillary Clinton", "Donald Trump", NA, "Donald Trump",
"Donald Trump", "Donald Trump", NA, "Hillary Clinton"), key = c("new_case_rate_07da",
"new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"mean_spend_all", "new_case_rate_07da", "new_case_rate_07da",
"new_case_rate_07da", "new_case_rate_07da", "mean_spend_all",
"mean_spend_all", "new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"new_case_rate_07da", "new_case_rate_07da", "mean_spend_all",
"mean_spend_all", "new_case_rate_07da", "new_case_rate_07da",
"new_case_rate_07da", "mean_spend_all", "new_case_rate_07da",
"new_case_rate_07da", "new_case_rate_07da", "mean_spend_all",
"mean_spend_all"), mean_spend_cases = c(NA, 7.15300714285714,
-0.0640216666666667, 0, 0.0156585338983051, 4.90477891156463,
1.04001215805471, 4.98906868131868, NA, -0.0116506382978723,
-0.0940805, 3.22004958592133, 0.0157676779661017, 10.4577329192547,
NA, -0.0137643636363636, 3.87815714285714, 5.65400529100529,
NA, 0.00507125, 0.0140480451612903, 5.29207102502018, 3.33591666666667,
0.280013559322034, 0.0406, NA, 4.06433752775722, NA, 0.00533333333333333,
-0.109501666666667)), row.names = c(NA, -30L), groups = structure(list(
day_month_year = structure(c(18273, 18273, 18273, 18280,
18287, 18287, 18294, 18301, 18301, 18308, 18308, 18329, 18343,
18343, 18350, 18350, 18371, 18399, 18399, 18406, 18406, 18413,
18427, 18427, 18434, 18434, 18434, 18434, 18448), tzone = "Europe/Prague", class = "Date"),
deciles_income = c(6L, 7L, 8L, 7L, 5L, 8L, 9L, 6L, 9L, 2L,
8L, 8L, 1L, 8L, 2L, 8L, 5L, 9L, 10L, 4L, 8L, 4L, 8L, 9L,
2L, 5L, 8L, 9L, 3L), .rows = structure(list(c(16L, 28L),
26L, 19L, 15L, 21L, 5L, 29L, 4L, 1L, 10L, 13L, 9L, 11L,
24L, 7L, 23L, 17L, 27L, 8L, 3L, 6L, 12L, 30L, 25L, 14L,
2L, 22L, 18L, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 29L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
您将如何解决这个问题?
答案 0 :(得分:2)
能否请您尝试下一个代码?数据很少,但我相信足以勾勒出您想要的内容:
library(ggplot2)
#Plot
ggplot()+
geom_point(data=subset(afc,key == "mean_spend_all"),aes(x=day_month_year,
y=mean_spend_cases *100,
color = winner2016))+
stat_smooth(data=subset(afc,key == "mean_spend_all"),
formula = y~as.numeric(x),method = "gam",se = F,
aes(x=day_month_year,y=mean_spend_cases*100,color = winner2016))+
geom_line(data=subset(afc,key == "mean_spend_all"),aes(x=day_month_year,
y=mean_spend_cases,
color = winner2016)) +
facet_wrap(.~deciles_income,scales = 'free')+
theme(legend.position = 'top')+ylab('')
这将产生下一个输出(少量数据点):
具有更多应更改的数据。现在,在stat_smooth
中,我不确定您想要什么,因此添加了可以看到的代码。这可以正常工作,因为我接下来将向您展示各个方面:
ggplot()+
geom_point(data=subset(afc,key == "mean_spend_all"),aes(x=day_month_year,
y=mean_spend_cases *100,
color = winner2016))+
stat_smooth(data=subset(afc,key == "mean_spend_all"),
formula = y~as.numeric(x),method = "gam",se = F,
aes(x=day_month_year,y=mean_spend_cases*100,color = winner2016))
输出:
有了更多数据,您应该拥有合适的曲线。我使用过gam
,但您应该有自己想要的方法。
答案 1 :(得分:0)
在您的ggplot()
调用中,您将x
的美学定义为afc$day_month_year
,这是该列中的所有数据。然后,在您的两个geom_
层中,将y
定义为afc$mean_spend_cases
的子集,该子集具有不同的点数。您还需要在x
层中定义geom_
美观度并将其子集化:
ggplot(data = group_by(afc, winner2016)) +
geom_point(aes(x = filter(afc, key == "mean_spend_all")$day_month_year,
color = winner2016,
y = filter(afc, key == "mean_spend_all")$mean_spend_cases *100)) +
geom_line(aes(x = filter(afc, key == "new_case_rate_07da")$day_month_year
color = winner2016,
y = filter(afc, key == "new_case_rate_07da")$mean_spend_cases)) +