我正在尝试开发一个闪亮的仪表板应用程序,该应用程序能够为用户可以选择的不同结果变量生成条形图。为此,我需要对数据进行反应性子集以生成聚合数据帧。我能够使下面的代码成功地以反应方式过滤我的数据,但是当我尝试以反应方式使用dplyr::summarise()
时遇到麻烦。
这是我当前的代码。 R版本3.6
dput(head(df))
structure(list(geoid = c("01001020200", "01001020300", "01001020700",
"01001020802", "01001021000", "01001021100"), state = c("Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama"), county = c("Autauga County",
"Autauga County", "Autauga County", "Autauga County", "Autauga County",
"Autauga County"), ozzone = structure(c(1L, 1L, 2L, 1L, 1L, 1L
), .Label = c("non.oz", "oz"), class = "factor"), tract_type = c("LICs",
"Contiguous", "LICs", "Contiguous", "Contiguous", "LICs"), investment_score_1_low_10_high = c(4,
6, 9, 10, 5, 6), socioeconomic_change_flag_1_yes_blank_no = c(0,
0, 0, 0, 0, 0), fips_county = c("01001", "01001", "01001", "01001",
"01001", "01001"), total_empl = c(51809L, 51809L, 51809L, 51809L,
51809L, 51809L), total_payroll = c(338395L, 338395L, 338395L,
338395L, 338395L, 338395L), total_establishments = c(5090L, 5090L,
5090L, 5090L, 5090L, 5090L), largest_employer = c(72L, 72L, 72L,
72L, 72L, 72L), largest_employer_bypayroll = c(44L, 44L, 44L,
44L, 44L, 44L), trend_employee_change = c(2735.60000000046, 2735.60000000046,
2735.60000000046, 2735.60000000046, 2735.60000000046, 2735.60000000046
), trend_payroll_change = c(23074.8000000037, 23074.8000000037,
23074.8000000037, 23074.8000000037, 23074.8000000037, 23074.8000000037
), trend_establishment_change = c(53.4000000000084, 53.4000000000084,
53.4000000000084, 53.4000000000084, 53.4000000000084, 53.4000000000084
), damage_cost_weather_total = c(20000, 20000, 20000, 20000,
20000, 20000), deaths_weather_total = c(0L, 0L, 0L, 0L, 0L, 0L
), medianrent = c(537, 633, 525, 680, 409, 303), vacancyrate = c(0.108200455580866,
0.113652113652114, 0.0436681222707424, 0.0512166859791425, 0.229962546816479,
0.21030303030303), total_pop = c(503, 827, 900, 2989, 740, 813
), undertwo_percent = c(0.391650099403579, 0.351874244256348,
0.397777777777778, 0.17096018735363, 0.301351351351351, 0.263222632226322
), mobility_rate = c(0.133702166897188, 0.0737753882915173, 0.196514423076923,
0.172716680111141, 0.0641304347826087, 0.0681084570690769), unemploy_rate = c(0.0176991150442478,
0.0273203592814371, 0.109881724532621, 0.0127906976744186, 0.0344982078853047,
0.0281910728269381), median_income = c(41287, 46806, 41250, 64439,
46607, 36450), renter_percent = c(0.337653478854025, 0.310596310596311,
0.331877729257642, 0.268110942458949, 0.328686327077748, 0.365986394557823
), blackaa_percent = c(0.5451197053407, 0.264697193500739, 0.145906432748538,
0.152916262243007, 0.258583690987124, 0.530922930542341), hispanic_percent = c(0.0105893186003683,
0.0803545051698671, 0.0400584795321637, 0.0137651107385511, 0.00822603719599428,
0.00666032350142721), transit_score_mean = c(0, 0, 0, 0, 0, 0
), life_expectancy = c(75.67, 75.67, 75.67, 75.67, 75.67, 75.67
), trend_life_expectancy = c(5.1, 5.1, 5.1, 5.1, 5.1, 5.1), median_monthly_housing_costs = c(885,
885, 885, 885, 885, 885), pestilence_2018 = c(2, 2, 2, 2, 2,
2), total_pop_county = c(6772, 6772, 6772, 6772, 6772, 6772),
deaths_weather_pop = c(0, 0, 0, 0, 0, 0), cost_weather_pop = c(2.95333727111636,
2.95333727111636, 2.95333727111636, 2.95333727111636, 2.95333727111636,
2.95333727111636), Male_HSgrad = c(75, 68, 211, 189, 97,
42), Male_SomeCollege = c(28, 18, 51, 111, 74, 38), Male_AssocDeg = c(4,
6, 0, 63, 0, 21), Male_BachDeg = c(7, 9, 0, 11, 0, 9), Male_GradDeg = c(0,
0, 0, 29, 6, 0), MaleEduAboveHS = c(114, 101, 262, 403, 177,
110), Total_Male18.24 = c(145, 123, 285, 455, 202, 110),
MaleEduHSAbove_pop = c(0.786206896551724, 0.821138211382114,
0.919298245614035, 0.885714285714286, 0.876237623762376,
1), Female_HSgrad = c(11, 60, 87, 156, 23, 83), Female_SomeCollege = c(22,
25, 13, 47, 54, 65), Female_AssocDeg = c(0, 0, 20, 82, 0,
0), Female_BachDeg = c(5, 26, 0, 19, 0, 11), Female_GradDeg = c(5,
16, 0, 0, 0, 0), FemaleEduAboveHS = c(43, 127, 120, 304,
77, 159), Total_Female18.24 = c(53, 127, 192, 581, 92, 198
), FemaleEduHSAbove_pop = c(0.811320754716981, 1, 0.625,
0.523235800344234, 0.83695652173913, 0.803030303030303)), na.action = structure(c(`1` = 1L,
`41` = 41L, `43` = 43L, `45` = 45L, `47` = 47L, `111` = 111L,
`135` = 135L, `251` = 251L, `275` = 275L, `276` = 276L, `278` = 278L,
`378` = 378L, `382` = 382L, `412` = 412L, `418` = 418L, `445` = 445L,
`477` = 477L, `478` = 478L, `479` = 479L, `480` = 480L, `481` = 481L,
`482` = 482L, `483` = 483L, `484` = 484L, `485` = 485L, `486` = 486L,
`487` = 487L, `488` = 488L, `528` = 528L, `553` = 553L, `626` = 626L,
`656` = 656L, `675` = 675L, `698` = 698L, `699` = 699L, `700` = 700L,
`701` = 701L, `737` = 737L, `738` = 738L, `774` = 774L, `785` = 785L,
`822` = 822L, `825` = 825L, `837` = 837L, `838` = 838L, `858` = 858L,
`859` = 859L, `860` = 860L, `861` = 861L, `862` = 862L, `863` = 863L,
`864` = 864L, `865` = 865L, `866` = 866L, `867` = 867L, `868` = 868L,
), class = "omit"), row.names = c(NA,
6L), class = "data.frame")
#List of states to choose from
states <- c("Alabama", "Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","Florida",
"Georgia","Hawaii","Idaho","Illinois","Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland","Massachusetts",
"Michigan","Minnesota","Mississippi","Missouri","Montana", "Nebraska","Nevada","New Hampshire","New Jersey","New Mexico",
"New York","North Carolina","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania", "Rhode Island","South Carolina",
"South Dakota","Tennessee","Texas","Utah","Vermont","Virginia","Washington","West Virginia","Wisconsin","Wyoming")
#List of potential outcome variables to be plotted
variables <- c("total_empl", "total_payroll", "total_establishments", "largest_employer", "largest_employer_bypayroll", "trend_employee_change", "trend_payroll_change", "trend_establishment_change", "damage_cost_weather_total", "deaths_weather_total", "medianrent", "vacancyrate", "total_pop", "undertwo_percent", "mobility_rate", "unemploy_rate", "median_income", "renter_percent", "blackaa_percent", "hispanic_percent", "median_monthly_housing_costs", "MaleEduAboveHS_pop", "FemaleEduHSAbove_pop")
# Define inputs
selectInput('state_name', label = 'Select a state', choices = lookup)
selectInput('DV', label = 'Outcome Measure', choices = variables)
#Filter data based on the State and outcome measure the user would like to investigate.
bar <- reactive({
st <- df %>%
filter(state == input$state_name)
bp <- st %>%
group_by(tract_type) %>%
summarise(Outcome = mean(st[,input$DV]))
return(bp)
})
bar
更新
现在,此代码已成功通过input$state_name
过滤了数据,但是均值的计算存在问题。结果是这样:
# A tibble: 2 x 2
tract_type Outcome
<chr> <dbl>
1 Contiguous 468296.
2 LICs 468296.
如您所见,计算的均值是相同的。实际上,这些值对应于为input$DV
选择的任何变量的总体平均值。因此,已过滤的st
数据未成功分组为tract_type
的两个级别。
答案 0 :(得分:0)
我知道您正在尝试做什么。不同之处在于,在您的反应部分中,您尝试计算字符串的均值,这将不起作用。您要做的是通过提供名称来总结df中的一列
在下面的示例中,我手动指定汇总变量。请注意,investment_score_1_low_10_high没有引号。 investment_score_1_low_10_high是R中的符号。
st <- df %>%
filter(state == "Alabama") %>%
group_by(tract_type) %>%
summarise(Outcome = mean(investment_score_1_low_10_high))
但是我认为这应该起作用:
bar <- reactive({
# Create a symbol from string.
mean_variable <- sym(input$DV)
bp <- df %>%
filter(state == input$state_name) %>%
group_by(tract_type) %>%
summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))
return(bp)
有关!!
的使用及其用途的更多信息,可以在这里找到:Here
使用示例Here
更好答案 1 :(得分:0)
bar <- reactive({
# Create a symbol from string.
mean_variable <- sym(input$DV)
bp <- df %>%
filter(state == input$state_name) %>%
group_by(tract_type) %>%
summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))
return(bp)
})