R - 将data.frame中多列的值与查找表进行匹配

时间:2018-02-02 19:29:36

标签: r dplyr purrr

目标: 我想将t1和t2中的值从df data转换为查找表lookup中的其他值(PWT)。我已经看过一些关于如何为单个列执行此操作的教程,但我想以编程方式为无限数量的t列(例如t1,t2,t3,t4,t5,...)执行此操作来自data

Lookup

 # A tibble: 6 x 4 (HEAD)
      Response `Final Fil.` Adjustment   PWT
      <chr>           <dbl>      <dbl> <dbl>
    1 00000            9.00      0.500  9.50
    2 00001            9.00     -0.500  8.50
    3 00010            7.00      0.500  7.50
    4 00011            7.00     -0.500  6.50
    5 00100            7.00      0.500  7.50
    6 00101            7.00     -0.500  6.50

lookup w / dput

    structure(list(Response = c("00000", "00001", "00010", "00011", 
"00100", "00101", "00110", "00111", "01000", "01001", "01010", 
"01011", "01100", "01101", "01110", "01111", "10000", "10001", 
"10010", "10011", "10100", "10101", "10110", "10111", "11000", 
"11001", "11010", "11011", "11100", "11101", "11110", "1111"), 
    `Final Fil.` = c(9, 9, 7, 7, 7, 7, 5, 5, 7, 7, 5, 5, 5, 5, 
    3, 3, 7, 7, 5, 5, 5, 5, 3, 3, 5, 5, 3, 3, 3, 3, 1, 2), Adjustment = c(0.5, 
    -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 
    0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, 
    -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5), PWT = c(9.5, 
    8.5, 7.5, 6.5, 7.5, 6.5, 5.5, 4.5, 7.5, 6.5, 5.5, 4.5, 5.5, 
    4.5, 3.5, 2.5, 7.5, 6.5, 5.5, 4.5, 5.5, 4.5, 3.5, 2.5, 5.5, 
    4.5, 3.5, 2.5, 3.5, 2.5, 1.5, 1.5)), .Names = c("Response", 
"Final Fil.", "Adjustment", "PWT"), row.names = c(NA, -32L), class = c("tbl_df", 
"tbl", "data.frame"))

Data

# A tibble: 6 x 4 (HEAD)
  Mouse Group t1    t2   
  <dbl> <chr> <chr> <chr>
1  1.00 SNI   00011 00000
2  2.00 Sham  00011 00001
3  3.00 SNI   00000 00001
4  4.00 Sham  00110 00000
5  5.00 SNI   00001 00001
6  6.00 Sham  00010 00101

data w / dput

structure(list(Mouse = c(1, 2, 3, 4, 5, 6, 7, 8), Group = c("SNI", 
"Sham", "SNI", "Sham", "SNI", "Sham", "SNI", "Sham"), t1 = c("00011", 
"00011", "00000", "00110", "00001", "00010", "01001", "00110"
), t2 = c("00000", "00001", "00001", "00000", "00001", "00101", 
"00100", "00010")), .Names = c("Mouse", "Group", "t1", "t2"), row.names = c(NA, 
-8L), class = c("tbl_df", "tbl", "data.frame"))

我可以使用以下代码在t1中为data执行此操作:

indices <- (match(x = data$t1, table = lookup$Response))
response <- mutate(data, t1=lookup$PWT[indices])

新表response

中的输出
 Mouse Group    t1 t2    t3    t4    t5    t6   
  <dbl> <chr> <dbl> <chr> <lgl> <lgl> <lgl> <lgl>
1  1.00 SNI    6.50 00000 NA    NA    NA    NA   
2  2.00 Sham   6.50 00001 NA    NA    NA    NA   
3  3.00 SNI    9.50 00001 NA    NA    NA    NA   
4  4.00 Sham   5.50 00000 NA    NA    NA    NA   
5  5.00 SNI    8.50 00001 NA    NA    NA    NA   
6  6.00 Sham   7.50 00101

我现在正在寻找以更多编程方式执行此操作以获得与我一样多的t列。

1 个答案:

答案 0 :(得分:1)

我认为最简单的方法是将其重新组合为连接表而不是匹配。我得到了一个dplyr,purrr和base::merge()处理字符串的解决方案。这应该适合你!


library(dplyr)
library(purrr)
data <- structure(list(Mouse = c(1, 2, 3, 4, 5, 6, 7, 8), Group = c("SNI", 
                                                                    "Sham", "SNI", "Sham", "SNI", "Sham", "SNI", "Sham"), t1 = c("00011", 
                                                                                                                                 "00011", "00000", "00110", "00001", "00010", "01001", "00110"
                                                                    ), t2 = c("00000", "00001", "00001", "00000", "00001", "00101", 
                                                                              "00100", "00010")), .Names = c("Mouse", "Group", "t1", "t2"), row.names = c(NA, 
                                                                                                                                                          -8L), class = c("tbl_df", "tbl", "data.frame"))

lookup <- structure(list(Response = c("00000", "00001", "00010", "00011", 
                                      "00100", "00101", "00110", "00111", "01000", "01001", "01010", 
                                      "01011", "01100", "01101", "01110", "01111", "10000", "10001", 
                                      "10010", "10011", "10100", "10101", "10110", "10111", "11000", 
                                      "11001", "11010", "11011", "11100", "11101", "11110", "1111"), 
                         `Final Fil.` = c(9, 9, 7, 7, 7, 7, 5, 5, 7, 7, 5, 5, 5, 5, 
                                          3, 3, 7, 7, 5, 5, 5, 5, 3, 3, 5, 5, 3, 3, 3, 3, 1, 2), Adjustment = c(0.5, 
                                                                                                                -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 
                                                                                                                0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, 
                                                                                                                -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5), PWT = c(9.5, 
                                                                                                                                                                           8.5, 7.5, 6.5, 7.5, 6.5, 5.5, 4.5, 7.5, 6.5, 5.5, 4.5, 5.5, 
                                                                                                                                                                           4.5, 3.5, 2.5, 7.5, 6.5, 5.5, 4.5, 5.5, 4.5, 3.5, 2.5, 5.5, 
                                                                                                                                                                           4.5, 3.5, 2.5, 3.5, 2.5, 1.5, 1.5)), .Names = c("Response", 
                                                                                                                                                                                                                           "Final Fil.", "Adjustment", "PWT"), row.names = c(NA, -32L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                  "tbl", "data.frame"))

not_matched <- c("Mouse", "Group")
to_match <- colnames(data)[!colnames(data) %in% not_matched]
to_match
#> [1] "t1" "t2"

lookup_subset <- lookup[c("Response", "PWT")]
lookup_subset
#> # A tibble: 32 x 2
#>    Response   PWT
#>       <chr> <dbl>
#>  1    00000   9.5
#>  2    00001   8.5
#>  3    00010   7.5
#>  4    00011   6.5
#>  5    00100   7.5
#>  6    00101   6.5
#>  7    00110   5.5
#>  8    00111   4.5
#>  9    01000   7.5
#> 10    01001   6.5
#> # ... with 22 more rows

to_match %>%
  map_dfc(function(col) {
    data[col] %>%
      merge(lookup_subset, by.x = col, by.y = "Response", all.x = TRUE) %>%
      select(-1) %>%
      set_names(col)
  }) %>%
  bind_cols(data[not_matched], .)
#> # A tibble: 8 x 4
#>   Mouse Group    t1    t2
#>   <dbl> <chr> <dbl> <dbl>
#> 1     1   SNI   9.5   9.5
#> 2     2  Sham   8.5   9.5
#> 3     3   SNI   7.5   8.5
#> 4     4  Sham   6.5   8.5
#> 5     5   SNI   6.5   8.5
#> 6     6  Sham   5.5   7.5
#> 7     7   SNI   5.5   7.5
#> 8     8  Sham   6.5   6.5