@dylanvanw派生的解决方案

Question

我正在尝试开发一个闪亮的仪表板应用程序，该应用程序能够为用户可以选择的不同结果变量生成条形图。为此，我需要对数据进行反应性子集以生成聚合数据帧。我能够使下面的代码成功地以反应方式过滤我的数据，但是当我尝试以反应方式使用dplyr::summarise()时遇到麻烦。

这是我当前的代码。 R版本3.6

dput(head(df))

structure(list(geoid = c("01001020200", "01001020300", "01001020700", 
"01001020802", "01001021000", "01001021100"), state = c("Alabama", 
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama"), county = c("Autauga County", 
"Autauga County", "Autauga County", "Autauga County", "Autauga County", 
"Autauga County"), ozzone = structure(c(1L, 1L, 2L, 1L, 1L, 1L
), .Label = c("non.oz", "oz"), class = "factor"), tract_type = c("LICs", 
"Contiguous", "LICs", "Contiguous", "Contiguous", "LICs"), investment_score_1_low_10_high = c(4, 
6, 9, 10, 5, 6), socioeconomic_change_flag_1_yes_blank_no = c(0, 
0, 0, 0, 0, 0), fips_county = c("01001", "01001", "01001", "01001", 
"01001", "01001"), total_empl = c(51809L, 51809L, 51809L, 51809L, 
51809L, 51809L), total_payroll = c(338395L, 338395L, 338395L, 
338395L, 338395L, 338395L), total_establishments = c(5090L, 5090L, 
5090L, 5090L, 5090L, 5090L), largest_employer = c(72L, 72L, 72L, 
72L, 72L, 72L), largest_employer_bypayroll = c(44L, 44L, 44L, 
44L, 44L, 44L), trend_employee_change = c(2735.60000000046, 2735.60000000046, 
2735.60000000046, 2735.60000000046, 2735.60000000046, 2735.60000000046
), trend_payroll_change = c(23074.8000000037, 23074.8000000037, 
23074.8000000037, 23074.8000000037, 23074.8000000037, 23074.8000000037
), trend_establishment_change = c(53.4000000000084, 53.4000000000084, 
53.4000000000084, 53.4000000000084, 53.4000000000084, 53.4000000000084
), damage_cost_weather_total = c(20000, 20000, 20000, 20000, 
20000, 20000), deaths_weather_total = c(0L, 0L, 0L, 0L, 0L, 0L
), medianrent = c(537, 633, 525, 680, 409, 303), vacancyrate = c(0.108200455580866, 
0.113652113652114, 0.0436681222707424, 0.0512166859791425, 0.229962546816479, 
0.21030303030303), total_pop = c(503, 827, 900, 2989, 740, 813
), undertwo_percent = c(0.391650099403579, 0.351874244256348, 
0.397777777777778, 0.17096018735363, 0.301351351351351, 0.263222632226322
), mobility_rate = c(0.133702166897188, 0.0737753882915173, 0.196514423076923, 
0.172716680111141, 0.0641304347826087, 0.0681084570690769), unemploy_rate = c(0.0176991150442478, 
0.0273203592814371, 0.109881724532621, 0.0127906976744186, 0.0344982078853047, 
0.0281910728269381), median_income = c(41287, 46806, 41250, 64439, 
46607, 36450), renter_percent = c(0.337653478854025, 0.310596310596311, 
0.331877729257642, 0.268110942458949, 0.328686327077748, 0.365986394557823
), blackaa_percent = c(0.5451197053407, 0.264697193500739, 0.145906432748538, 
0.152916262243007, 0.258583690987124, 0.530922930542341), hispanic_percent = c(0.0105893186003683, 
0.0803545051698671, 0.0400584795321637, 0.0137651107385511, 0.00822603719599428, 
0.00666032350142721), transit_score_mean = c(0, 0, 0, 0, 0, 0
), life_expectancy = c(75.67, 75.67, 75.67, 75.67, 75.67, 75.67
), trend_life_expectancy = c(5.1, 5.1, 5.1, 5.1, 5.1, 5.1), median_monthly_housing_costs = c(885, 
885, 885, 885, 885, 885), pestilence_2018 = c(2, 2, 2, 2, 2, 
2), total_pop_county = c(6772, 6772, 6772, 6772, 6772, 6772), 
    deaths_weather_pop = c(0, 0, 0, 0, 0, 0), cost_weather_pop = c(2.95333727111636, 
    2.95333727111636, 2.95333727111636, 2.95333727111636, 2.95333727111636, 
    2.95333727111636), Male_HSgrad = c(75, 68, 211, 189, 97, 
    42), Male_SomeCollege = c(28, 18, 51, 111, 74, 38), Male_AssocDeg = c(4, 
    6, 0, 63, 0, 21), Male_BachDeg = c(7, 9, 0, 11, 0, 9), Male_GradDeg = c(0, 
    0, 0, 29, 6, 0), MaleEduAboveHS = c(114, 101, 262, 403, 177, 
    110), Total_Male18.24 = c(145, 123, 285, 455, 202, 110), 
    MaleEduHSAbove_pop = c(0.786206896551724, 0.821138211382114, 
    0.919298245614035, 0.885714285714286, 0.876237623762376, 
    1), Female_HSgrad = c(11, 60, 87, 156, 23, 83), Female_SomeCollege = c(22, 
    25, 13, 47, 54, 65), Female_AssocDeg = c(0, 0, 20, 82, 0, 
    0), Female_BachDeg = c(5, 26, 0, 19, 0, 11), Female_GradDeg = c(5, 
    16, 0, 0, 0, 0), FemaleEduAboveHS = c(43, 127, 120, 304, 
    77, 159), Total_Female18.24 = c(53, 127, 192, 581, 92, 198
    ), FemaleEduHSAbove_pop = c(0.811320754716981, 1, 0.625, 
    0.523235800344234, 0.83695652173913, 0.803030303030303)), na.action = structure(c(`1` = 1L, 
`41` = 41L, `43` = 43L, `45` = 45L, `47` = 47L, `111` = 111L, 
`135` = 135L, `251` = 251L, `275` = 275L, `276` = 276L, `278` = 278L, 
`378` = 378L, `382` = 382L, `412` = 412L, `418` = 418L, `445` = 445L, 
`477` = 477L, `478` = 478L, `479` = 479L, `480` = 480L, `481` = 481L, 
`482` = 482L, `483` = 483L, `484` = 484L, `485` = 485L, `486` = 486L, 
`487` = 487L, `488` = 488L, `528` = 528L, `553` = 553L, `626` = 626L, 
`656` = 656L, `675` = 675L, `698` = 698L, `699` = 699L, `700` = 700L, 
`701` = 701L, `737` = 737L, `738` = 738L, `774` = 774L, `785` = 785L, 
`822` = 822L, `825` = 825L, `837` = 837L, `838` = 838L, `858` = 858L, 
`859` = 859L, `860` = 860L, `861` = 861L, `862` = 862L, `863` = 863L, 
`864` = 864L, `865` = 865L, `866` = 866L, `867` = 867L, `868` = 868L, 
), class = "omit"), row.names = c(NA, 
6L), class = "data.frame")

#List of states to choose from 
states <- c("Alabama", "Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","Florida",
            "Georgia","Hawaii","Idaho","Illinois","Indiana","Iowa","Kansas","Kentucky","Louisiana","Maine","Maryland","Massachusetts",
            "Michigan","Minnesota","Mississippi","Missouri","Montana", "Nebraska","Nevada","New Hampshire","New Jersey","New Mexico",
            "New York","North Carolina","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania", "Rhode Island","South Carolina",
            "South Dakota","Tennessee","Texas","Utah","Vermont","Virginia","Washington","West Virginia","Wisconsin","Wyoming")

#List of potential outcome variables to be plotted
variables <- c("total_empl", "total_payroll", "total_establishments", "largest_employer", "largest_employer_bypayroll", "trend_employee_change", "trend_payroll_change", "trend_establishment_change", "damage_cost_weather_total", "deaths_weather_total", "medianrent", "vacancyrate", "total_pop", "undertwo_percent", "mobility_rate", "unemploy_rate", "median_income", "renter_percent", "blackaa_percent", "hispanic_percent", "median_monthly_housing_costs", "MaleEduAboveHS_pop", "FemaleEduHSAbove_pop")

# Define inputs
selectInput('state_name', label = 'Select a state', choices = lookup)

selectInput('DV', label = 'Outcome Measure', choices = variables)

#Filter data based on the State and outcome measure the user would like to investigate.

bar <- reactive({

  st <- df %>%
        filter(state == input$state_name) 

  bp <- st %>%
        group_by(tract_type) %>%
        summarise(Outcome = mean(st[,input$DV]))

  return(bp)
})

bar

更新现在，此代码已成功通过input$state_name过滤了数据，但是均值的计算存在问题。结果是这样：

# A tibble: 2 x 2
  tract_type Outcome
  <chr>        <dbl>
1 Contiguous   468296.
2 LICs         468296.

如您所见，计算的均值是相同的。实际上，这些值对应于为input$DV选择的任何变量的总体平均值。因此，已过滤的st数据未成功分组为tract_type的两个级别。

Answer 1

我知道您正在尝试做什么。不同之处在于，在您的反应部分中，您尝试计算字符串的均值，这将不起作用。您要做的是通过提供名称来总结df中的一列

在下面的示例中，我手动指定汇总变量。请注意，investment_score_1_low_10_high没有引号。 investment_score_1_low_10_high是R中的符号。

st <- df %>%
  filter(state == "Alabama") %>% 
  group_by(tract_type) %>%
  summarise(Outcome = mean(investment_score_1_low_10_high))

但是我认为这应该起作用：

bar <- reactive({
  # Create a symbol from string.
  mean_variable <- sym(input$DV)
  bp <- df %>%
        filter(state == input$state_name) %>%
        group_by(tract_type) %>%
        summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))

  return(bp)

有关!!的使用及其用途的更多信息，可以在这里找到：Here

使用示例Here

更好

Answer 2

@dylanvanw派生的解决方案

bar <- reactive({
  # Create a symbol from string.
  mean_variable <- sym(input$DV)
  bp <- df %>%
        filter(state == input$state_name) %>%
        group_by(tract_type) %>%
        summarise(Outcome = mean(!! mean_variable, na.rm = TRUE))

  return(bp)
})

如何在Flexdashboard / Shiny中以反应方式汇总（dplyr）用户指定的变量？

2 个答案:

@dylanvanw派生的解决方案