R:列数未知的行的最小值和最大值

时间:2018-08-25 06:58:21

标签: r dplyr

对于数据帧,我需要按行查找从第2列开始的未知列数的最小值和最大值。这是一个示例:

library(tidyverse)

# test data
(test_data <- tibble(id = c(1:9), 
                     x = runif(9), 
                     x2 = runif(9),
                     x3 = runif(9)))
samples = 100    

# This example, which specifies the column names, correctly finds the min and max values by row
(test_1 <- test_data %>% 
  rowwise() %>%
  mutate(min_val = min(x, x2, x3), max_val = max(x, x2, x3)))

# This example does not
(test_2 <- test_data %>% 
    rowwise() %>%
    mutate(min_val = min(x:x3), max_val = max(x:x3)))

我实际上想做的是

mutate(min_val = min([,2:samples+1]), max_val = max([,2:samples+1])))

因为(1)我希望保留id列(以供以后与另一个数据框连接),并且(2)按列位置指定似乎是实现此目的的一种明显方法,因为我对列名和样本都不关心大。

谢谢!

编辑后的示例

这(建议)

test_data %>%
  nest(-id) %>%                         # nest rest of columns apart from id
  mutate(min_val = map(data, min),      # get min and max
         max_val = map(data, max)) %>%
  unnest()   

处理原始测试数据。但是,现实世界中的数据具有重复的id,例如

(test_data <- tibble(id = c(1:9, 1:9), 
                     x = runif(18), 
                     x2 = runif(18),
                     x3 = runif(18)))

并导致“错误:所有嵌套列必须具有相同数量的元素。”

3 个答案:

答案 0 :(得分:1)

一种可能的tidyverse解决方案是nestid以外的任何列,然后使用map获得min和{{1 }}。您无需指定任何列名:

max

在具有多个ID的情况下,您可以使用以下方法:

library(tidyverse)

# test data
(test_data <- tibble(id = c(1:9), 
                     x = runif(9), 
                     x2 = runif(9),
                     x3 = runif(9)))
samples = 100    

test_data %>%
  nest(-id) %>%                         # nest rest of columns apart from id
  mutate(min_val = map(data, min),      # get min and max
         max_val = map(data, max)) %>%
  unnest()                              # unnest columns

# # A tibble: 9 x 6
#      id min_val max_val      x     x2    x3
#   <int>   <dbl>   <dbl>  <dbl>  <dbl> <dbl>
# 1     1  0.0217   0.239 0.130  0.0217 0.239
# 2     2  0.125    0.814 0.625  0.814  0.125
# 3     3  0.281    0.770 0.331  0.770  0.281
# 4     4  0.123    0.868 0.123  0.644  0.868
# 5     5  0.149    0.340 0.149  0.340  0.337
# 6     6  0.496    0.865 0.596  0.865  0.496
# 7     7  0.0766   0.984 0.0766 0.656  0.984
# 8     8  0.272    0.926 0.702  0.926  0.272
# 9     9  0.433    0.912 0.912  0.433  0.590

答案 1 :(得分:0)

如果要根据列名选择列:

library(tidyverse)   
df <- tibble(id = c(1:9), 
                 x = runif(9), 
                 x2 = runif(9),
                 x3 = runif(9))

df <- tibble::rowid_to_column(df, "id_uni") #Assigning a unique ID to all rows

df2 <- df %>% 
      group_by(id_uni) %>%
      do(min_val = min(select(., grep("x", names(df), value = TRUE))),
         max_val = max(select(., grep("x", names(df), value = TRUE)))) %>%
      unnest() 

df <- left_join(df, df2, by = "id_uni")

# A tibble: 9 x 7
  id_uni    id     x     x2      x3 min_val max_val
   <int> <int> <dbl>  <dbl>   <dbl>   <dbl>   <dbl>
1      1     1 0.714 0.879  0.943   0.714     0.943
2      2     2 0.240 0.409  0.375   0.240     0.409
3      3     3 0.896 0.205  0.00804 0.00804   0.896
4      4     4 0.483 0.471  0.981   0.471     0.981
5      5     5 0.263 0.379  0.378   0.263     0.379
6      6     6 0.320 0.248  0.986   0.248     0.986
7      7     7 0.664 0.925  0.140   0.140     0.925
8      8     8 0.728 0.466  0.562   0.466     0.728
9      9     9 0.571 0.0139 0.794   0.0139    0.794

或者:

df2 <- df %>% 
  group_by(id_uni) %>%
  do(min_val = min(select(.,starts_with("x"))),
         max_val = max(select(.,starts_with("x")))) %>%
  unnest() 

df <- left_join(df, df2, by = "id_uni")

# A tibble: 9 x 7
  id_uni    id     x     x2     x3 min_val max_val
   <int> <int> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
1      1     1 0.101 0.606  0.743   0.101    0.743
2      2     2 0.312 0.496  0.0684  0.0684   0.496
3      3     3 0.434 0.109  0.532   0.109    0.532
4      4     4 0.616 0.846  0.123   0.123    0.846
5      5     5 0.339 0.446  0.676   0.339    0.676
6      6     6 0.580 0.750  0.0560  0.0560   0.750
7      7     7 0.830 0.796  0.798   0.796    0.830
8      8     8 0.335 0.391  0.663   0.335    0.663
9      9     9 0.382 0.0148 0.244   0.0148   0.382

答案 2 :(得分:0)

这是pmin/pmax

的一个选项
library(tidyverse)
test_data %>% 
     mutate(min_val = pmin(!!! rlang::syms(names(.)[-1])),
            max_val = pmax(!!! rlang::syms(names(.)[-1])))
# A tibble: 9 x 6
#     id     x     x2     x3 min_val max_val
#  <int> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
#1     1 0.293 0.255  0.501   0.255    0.501
#2     2 0.225 0.605  0.139   0.139    0.605
#3     3 0.704 0.371  0.0939  0.0939   0.704
#4     4 0.519 0.672  0.552   0.519    0.672
#5     5 0.663 0.673  0.725   0.663    0.725
#6     6 0.920 0.320  0.138   0.138    0.920
#7     7 0.280 0.904  0.223   0.223    0.904
#8     8 0.764 0.198  0.688   0.198    0.764
#9     9 0.802 0.0442 0.0765  0.0442   0.802

数据

set.seed(24)
test_data <- tibble(id = c(1:9), 
                    x = runif(9), 
                    x2 = runif(9),
                    x3 = runif(9))