整理“基准”列

时间:2019-02-18 18:05:04

标签: r tidyverse

我有一个数据框,该数据框通常很整洁,但是2列包含基准,而不是将基准作为观察值。我该如何整理,以便为每个唯一的FYQ和Metric组合在“ Facility_label”下添加“ Facility_score”和“ TTP” col_name作为观察值?

输入数据:

library(zoo)

dd <- structure(list(Facility_label = structure(c(1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "B", "C", 
"D", "Nashville"), class = "factor"), FYQ = structure(c(2017.75, 
2018, 2018.25, 2018.5, 2017.75, 2018, 2018.25, 2018.5, 2018.75, 
2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75, 2018, 2018.25, 
2018.5, 2018.75, 2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75, 
2018, 2018.25, 2018.5, 2018.75, 2017.75), class = "yearqtr"), 
    Metric = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    1L, 1L, 1L, 1L, 1L, 2L), .Label = c("Safety Recall", "Turnaround days", 
    "Consult Active <= 30d", "Consult Pending <- 7d", "Consult Scheduling <- 90d", 
    "ICB Compliance Rate", "FCA Assessment", "Minor construction execution", 
    "NRM funding execution", "Deficincies", "%Deficienceis corrected among corrected or action plan", 
    "%Deficienceis corrected or action plan", "Ratio of Hospital Staff to HR FTE", 
    "Turnover Rate", "GEMS no Action", "Lost time claims", "RTMS risk score", 
    "DOC Control", "Loaner deficiencies", "Pretreatment", "RME rate", 
    "SPS staff vacany rate", "Stock Inactive", "MSPV-NG", "Days to close prosthetis consult", 
    "%Prosthetic PO using national contracts"), class = "factor"), 
    Facility_score = c(84.78802993, 95.59659091, 100, 100, 77.61732852, 
    57.87671233, 81.28898129, 33.33333333, 31.57894737, 10.2, 
    7.902356902, 8.62, 11.71, 13.15, 30.98236776, 33.26086957, 
    31.19584055, 54.54545455, 27.27272727, 11, 17.19132653, 26.02008197, 
    22.29, 30.41, 89.09090909, 93.47826087, 82.10735586, 91.66666667, 
    87.5, 3.2), `Facility mean` = c(85.35550152, 87.31899147, 
    93.11498231, 100, 85.35550152, 87.31899147, 93.11498231, 
    100, 100, 12, 13.06073298, 12.2, 11.51, 10.56, 85.35550152, 
    87.31899147, 93.11498231, 100, 100, 12, 13.06073298, 12.2, 
    11.51, 10.56, 85.35550152, 87.31899147, 93.11498231, 100, 
    100, 12), TTP_score = c(100, 100, 100, 100, 100, 100, 100, 
    100, 100, 5.65, 5.063953488, 4.779310345, 4.47, 4.545, 100, 
    100, 100, 100, 100, 5.65, 5.063953488, 4.779310345, 4.47, 
    4.545, 100, 100, 100, 100, 100, 5.65)), row.names = c(NA, 
-30L), class = c("tbl_df", "tbl", "data.frame"))

所需的输出:

dd_output <- structure(list(Facility_label = c("A", "Facility mean", "TTP score", 
"A", "Facility mean", "TTP score", "A", "Facility mean", "TTP score", 
"A", "Facility mean", "TTP score", "B", "B", "B", "B", "B", "B", 
"Facility mean", "TTP score", "B", "Facility mean", "TTP score", 
"B", "Facility mean", "TTP score", "B", "Facility mean", "TTP score", 
"B", "Facility mean", "TTP score", "C", "C", "C", "C", "C", "C", 
"C", "C", "C", "C", "D", "D", "D", "D", "D", "D"), FYQ = c("2017 Q4", 
"2017 Q4", "2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2", 
"2018 Q2", "2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2017 Q4", 
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4", "2017 Q4", 
"2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2", "2018 Q2", 
"2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2018 Q4", "2018 Q4", 
"2018 Q4", "2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", 
"2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4", 
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4"), Metric = c("Safety Recall", 
"Safety Recall", "safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Turnaround days", "Turnaround days", "Safety Recall", 
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall", 
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days", 
"Turnaround days", "Safety Recall", "Safety Recall", "Safety Recall", 
"Safety Recall", "Safety Recall", "Turnaround days"), Facility_score = c(84.78802993, 
85.35550152, 100, 95.59659091, 87.31899147, 100, 100, 93.11498231, 
100, 100, 100, 100, 77.61732852, 57.87671233, 81.28898129, 33.33333333, 
31.57894737, 10.2, 12, 5.65, 7.902356902, 13.06073298, 5.063953488, 
8.62, 12.2, 4.779310345, 11.71, 11.51, 4.47, 13.15, 10.56, 4.545, 
30.98236776, 33.26086957, 31.19584055, 54.54545455, 27.27272727, 
11, 17.19132653, 26.02008197, 22.29, 30.41, 89.09090909, 93.47826087, 
82.10735586, 91.66666667, 87.5, 3.2)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -48L), spec = structure(list(
    cols = list(X1 = structure(list(), class = c("collector_skip", 
    "collector")), Facility_label = structure(list(), class = c("collector_character", 
    "collector")), FYQ = structure(list(), class = c("collector_character", 
    "collector")), Metric = structure(list(), class = c("collector_character", 
    "collector")), Facility_score = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1), class = "col_spec"))

1 个答案:

答案 0 :(得分:1)

我们可以通过tidyr::gather和一些dplyr::mutate操作来做到这一点:

library(tidyverse)

dd %>%
  mutate(ID = row_number()) %>%
  gather(var, Facility_score, Facility_score:TTP_score) %>%
  group_by(FYQ, Metric,
           temp_ID = case_when(var == "Facility mean" ~ 1, 
                               var == "TTP_score" ~ 2, 
                               TRUE ~ 0)) %>%
  slice(if(any(temp_ID == 0)) row_number() else 1) %>%
  mutate(Facility_label = if_else(var == "Facility_score", as.character(Facility_label), var)) %>%
  ungroup() %>%
  arrange(ID, temp_ID) %>% 
  select(ID, everything(), -var, -temp_ID) 

请注意,我已经添加了ID列以指示原始行号。这样一来,将同一行中的所有分数相加时,就不会造成混淆。

注释:

  1. gather将表从宽格式重整为长格式,将Facility_scoreFacility meanTTP_score的条目视为新的Facility_score 。创建变量var来临时存储值标签。

  2. 然后我们group_by FYQMetric和一个临时ID变量(temp_ID)通过将Facility mean设置为{{1} },1TTP_score,以及2var中的所有其他内容。

  3. 基于0,如果temp_IDslice,则使用0来捕获所有行,否则使用第一行。这样会有效地返回与Facility_score相对应的所有行,但是在每个Facility mean + TTP_score组合中,只有FYQMetric之一。

  4. 接下来,我们用Facility_label中的相应标签替换var

  5. 最后,ungrouparrange分别IDtemp_ID,并在删除vartemp_ID的同时重新排列列顺序({everything很有用,当我们想在不影响其他变量的情况下将一个或多个变量放在前面时。)

输出:

# A tibble: 50 x 5
      ID Facility_label FYQ           Metric          Facility_score
   <int> <chr>          <S3: yearqtr> <fct>                    <dbl>
 1     1 A              2017 Q4       Safety Recall             84.8
 2     1 Facility mean  2017 Q4       Safety Recall             85.4
 3     1 TTP_score      2017 Q4       Safety Recall            100  
 4     2 A              2018 Q1       Safety Recall             95.6
 5     2 Facility mean  2018 Q1       Safety Recall             87.3
 6     2 TTP_score      2018 Q1       Safety Recall            100  
 7     3 A              2018 Q2       Safety Recall            100  
 8     3 Facility mean  2018 Q2       Safety Recall             93.1
 9     3 TTP_score      2018 Q2       Safety Recall            100  
10     4 A              2018 Q3       Safety Recall            100  
11     4 Facility mean  2018 Q3       Safety Recall            100  
12     4 TTP_score      2018 Q3       Safety Recall            100  
13     5 B              2017 Q4       Safety Recall             77.6
14     6 B              2018 Q1       Safety Recall             57.9
15     7 B              2018 Q2       Safety Recall             81.3
16     8 B              2018 Q3       Safety Recall             33.3
17     9 B              2018 Q4       Safety Recall             31.6
18     9 Facility mean  2018 Q4       Safety Recall            100  
19     9 TTP_score      2018 Q4       Safety Recall            100  
20    10 B              2017 Q4       Turnaround days           10.2
# ... with 30 more rows