Question

我有一个数据集，其中每一行代表一个具有多个物料的订单，每个物料的订购数量。我想知道数量最多的每一行的商品名称和数量。

我的数据如下：

Item1   Qty1    Item2   Qty2    Item3   Qty3    Item4   Qty4
SUV1     4       SUV2    5       SUV3    5       SUV4    3
SUV4     7       PLV4    3       PNC5    6        NA    NA
SUV3     5       PNC3    5        NA     NA       NA    NA

当我尝试以下代码时，我能够获得每行中具有最高值的列名，但没有项目名：

## 
library(tidyverse)

sodf_rank<- sodf2 %>% 
  rownames_to_column('id') %>%  # creates an ID number
  gather(dept, cnt, SKU1_Qty:SKU10_Qty) %>% 
  group_by(id) %>% 
  slice(which.max(cnt))
##

我期望得到如下结果：

RowID   Item    Qty
1       SUV2    5
2       SUV4    7
3       SUV3    5

Answer 1

library(tidyverse)

df1 %>% 
  rowid_to_column() %>% 
  unite(Item, Item1, Item2, Item3, Item4) %>% 
  unite(Qty, Qty1, Qty2, Qty3, Qty4) %>% 
  separate_rows(2:3, sep = "_") %>% 
  mutate(Qty = as.numeric(Qty)) %>% 
  group_by(rowid) %>% 
  filter(Qty == max(Qty, na.rm = TRUE))

#> # A tibble: 5 x 3
#> # Groups:   rowid [3]
#>   rowid Item    Qty
#>   <int> <chr> <dbl>
#> 1     1 SUV2      5
#> 2     1 SUV3      5
#> 3     2 SUV4      7
#> 4     3 SUV3      5
#> 5     3 PNC3      5

或者代替最后一行中的filter(Qty == max(Qty, na.rm = TRUE))：

              ... %>% 
  arrange(-Qty) %>% 
  slice(1)

获得：

# # A tibble: 3 x 3
# # Groups:   rowid [3]
#   rowid Item    Qty
#   <int> <chr> <dbl>
# 1     1 SUV2      5
# 2     2 SUV4      7
# 3     3 SUV3      5
# Warning message:
# NAs introduced by coercion

数据：

df1 <- read.table(text="Item1   Qty1    Item2   Qty2    Item3   Qty3    Item4   Qty4
                        SUV1       4    SUV2       5    SUV3       5    SUV4       3
                        SUV4       7    PLV4       3    PNC5       6    NA        NA
                        SUV3       5    PNC3       5    NA        NA    NA        NA", 
                   header=T)

Answer 2

在基数R中，我们可以将"Item"和"Qty"列分开，使用max.col从qty_cols获取最大值，并从item_cols获取对应的值

item_cols <- grep("^Item", names(df), value = TRUE)
qty_cols <- grep("^Qty", names(df), value = TRUE)

inds <- cbind(seq_len(nrow(df)), max.col(replace(df[qty_cols], 
              is.na(df[qty_cols]), 0), ties.method = "first"))

data.frame(RowID = seq_len(nrow(df)), Item = df[item_cols][inds], 
           Qty = df[qty_cols][inds])


#  RowID Item Qty
#1     1 SUV2   5
#2     2 SUV4   7
#3     3 SUV3   5

或者使用逐行apply的另一个选项

t(apply(df, 1, function(x) {
     inds <- which.max(x[qty_cols])
     c(x[qty_cols][inds], x[item_cols][inds])
}))

查找最大值和包含每一行最大值的项目

2 个答案: