Question

我正在尝试通过R中的分组变量创建多个条件。我想做的是在分组变量中获取Index的min和max并提取出各自的价格。所需的输出如下所示

df <- data.frame(ID = c("ABC", "ABC", "BCD", "BCD", "BCD", "DEF", "DEF"), 
                 Price = c(31, 81, 100, 84, 15, 31, 42),
                 Index = c(3,6,2,9,5,12,18))

df
   ID Price Index
1 ABC    31     3
2 ABC    81     6
3 BCD   100     2
4 BCD    84     9
5 BCD    15     5
6 DEF    31    12
7 DEF    42    18

例如，如果我们查看ID =“ BCD”，则有3个条目。根据索引，价格为min(index) = 2，价格= 100，价格为max(index) = 9，价格为84

这不会执行分组

df %>% group_by(ID) %>% mutate(firstPrice = min(df$Order), lastPrice = max(df$Order)) 

  ID    Price Order firstPrice lastPrice
  <fct> <dbl> <dbl>      <dbl>     <dbl>
1 ABC      31     3          2        18
2 ABC      81     6          2        18
3 BCD     100     2          2        18
4 BCD      84     9          2        18
5 BCD      15     5          2        18
6 DEF      31    12          2        18
7 DEF      42    18          2        18

预期输出

ID    Price     Order firstPrice lastPrice
1 ABC      31     3        31       81
2 ABC      81     6        31       81
3 BCD     100     2        100      84
4 BCD      84     9        100      84
5 BCD      15     5        100      84
6 DEF      31    12        31       42
7 DEF      42    18        31       42

Answer 1

我们可以group_by ID并使用which.min和which.max来获取索引，然后从中获取相应的Price

library(dplyr)

df %>%
  group_by(ID) %>%
  mutate(firstPrice = Price[which.min(Index)], 
         lastPrice = Price[which.max(Index)])



#   ID    Price Index firstPrice lastPrice
#  <fct> <dbl> <dbl>      <dbl>     <dbl>
#1 ABC      31     3         31        81
#2 ABC      81     6         31        81
#3 BCD     100     2        100        84
#4 BCD      84     9        100        84
#5 BCD      15     5        100        84
#6 DEF      31    12         31        42
#7 DEF      42    18         31        42

Answer 2

您也可以这样做：

df %>%
 group_by(ID) %>%
 mutate(firstprice = Price[Index == min(Index)],
        lastprice = Price[Index == max(Index)])

  ID    Price Index firstprice lastprice
  <fct> <dbl> <dbl>      <dbl>     <dbl>
1 ABC     31.    3.        31.       81.
2 ABC     81.    6.        31.       81.
3 BCD    100.    2.       100.       84.
4 BCD     84.    9.       100.       84.
5 BCD     15.    5.       100.       84.
6 DEF     31.   12.        31.       42.
7 DEF     42.   18.        31.       42.

或者：

df %>%
 group_by(ID) %>%
 arrange(Index, .by_group = TRUE) %>%
 mutate(firstprice = Price[Index == first(Index)],
        lastprice = Price[Index == last(Index)])

  ID    Price Index firstprice lastprice
  <fct> <dbl> <dbl>      <dbl>     <dbl>
1 ABC     31.    3.        31.       81.
2 ABC     81.    6.        31.       81.
3 BCD    100.    2.       100.       84.
4 BCD     15.    5.       100.       84.
5 BCD     84.    9.       100.       84.
6 DEF     31.   12.        31.       42.
7 DEF     42.   18.        31.       42.

Answer 3

使用data.table

的选项

library(data.table)
setDT(df)[, .(firstprice = Price[which.min(Index)], 
      lastprice = Price[which.max(Index)]), ID]
#     ID firstprice lastprice
#1: ABC         31        81
#2: BCD        100        84
#3: DEF         31        42

或带有tidyverse

的另一个选项

library(tidyverse)
df %>% 
  group_by(ID) %>%
  filter(Index %in% range(Index)) %>% 
  mutate(newCol = c("firstprice", "lastprice")) %>% 
  select(-Index) %>% 
  spread(newCol, Price)
# A tibble: 3 x 3
# Groups:   ID [3]
#  ID    firstprice lastprice
#  <fct>      <dbl>     <dbl>
#1 ABC           31        81
#2 BCD          100        84
#3 DEF           31        42

使用dplyr通过分组变量解析多个条件

3 个答案: