Question

我需要根据另一个dataframe2列对以下dataframe1进行排序：

Dataframe1：

LC       Item     Rolledfcst
MW92    1358576     2125
RM11    1358576     3955
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
SW92    1267890     670
RM11    1267890     890
WK14    1267890     245

数据框2：

      Item   LC1    LC2     LC3
    1358576 RM11    MW92    SW92
    1358576 RM11    WK14    NA
    1267890 MW92    SW92    NA
    1267890 RM11    WK14    NA

现在，对于Dataframe1中的每个项目，它都应按Dataframe2列的顺序对LC进行排序，以使第一列LC1元素然后是LC2以及之后的LC3。

注意：对于每个项目，在LC1或LC2或LC3中，任何列中都有2个LC，则顺序无关紧要

输出数据框：

LC       Item     Rolledfcst
RM11    1358576     3955
MW92    1358576     2125
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
RM11    1267890     890
SW92    1267890     670
WK14    1267890     245

Answer 1

一种tidyverse解决方案：

df1 <- structure(list(LC         = c("MW92", "RM11", "WK14", "SW92", "MW92", 
                                     "SW92","RM11", "WK14"), 
                      Item       = c(1358576L, 1358576L, 1358576L, 1358576L, 
                                     1267890L, 1267890L, 1267890L, 1267890L), 
                      Rolledfcst = c(2125L, 3955L, 307L, 190L, 200L, 670L, 
                                     890L, 245L)), 
                      class      = "data.frame", 
                      row.names  = c(NA, -8L))

df2 <- structure(list(Item      = c(1358576L, 1358576L, 1267890L, 1267890L), 
                      LC1       = c("RM11", "RM11", "MW92", "RM11"), 
                      LC2       = c("MW92", "WK14", "SW92", "WK14"), 
                      LC3       = c("SW92", NA, NA, NA)), 
                      class     = "data.frame",  
                      row.names = c(NA, -4L))

首先通过df2确定长格式的订单：

library(tidyverse)
(ord <- df2 %>%
    gather(LC, value, -Item) %>% 
    arrange(Item, LC) %>% 
    group_by(Item) %>% 
    mutate(order = as.numeric(factor(value, unique(value)))) %>% 
    filter(!is.na(order)) %>% unique())

# # A tibble: 8 x 4
# # Groups:   Item [2]
#      Item LC    value order
#     <int> <chr> <chr> <dbl>
# 1 1267890 LC1   MW92      1
# 2 1267890 LC1   RM11      2
# 3 1267890 LC2   SW92      3
# 4 1267890 LC2   WK14      4
# 5 1358576 LC1   RM11      1
# 6 1358576 LC2   MW92      2
# 7 1358576 LC2   WK14      3
# 8 1358576 LC3   SW92      4

现在，加入df1和ord并排序为item和order：

left_join(df1, 
          ord %>% select(-LC), 
          by = c("Item", LC = "value")) %>% 
   arrange(desc(Item), order) %>% select(-order)

#     LC    Item Rolledfcst
# 1 RM11 1358576       3955
# 2 MW92 1358576       2125
# 3 WK14 1358576        307
# 4 SW92 1358576        190
# 5 MW92 1267890        200
# 6 RM11 1267890        890
# 7 SW92 1267890        670
# 8 WK14 1267890        245

根据此处的评论，这是一个基本的R解决方案：

## transform data to long format
ord <- reshape(df2, varying=names(df2)[-1], direction = "long", v.names = "LC")
## sort according to item and time
ord <- ord[order(-ord$Item, ord$time), ]
## remove NAs
ord <- ord[!is.na(ord$LC),]
## remove duplicates
ord <- ord[!duplicated(ord[, c(1, 3)]), ]
## add the order
split(ord$id, ord$Item) <- lapply(split(ord$id, ord$Item), seq_along)
## merge the data
df.res <- merge(df1, ord[, -2], by = c("Item", "LC"))
## sort according to order
df.res[order(-df.res$Item, df.res$id), -4]
#      Item   LC Rolledfcst
# 6 1358576 RM11       3955
# 5 1358576 MW92       2125
# 8 1358576 WK14        307
# 7 1358576 SW92        190
# 1 1267890 MW92        200
# 2 1267890 RM11        890
# 3 1267890 SW92        670
# 4 1267890 WK14        245

Answer 2

读入数据：

df1 <- read.table(text = 'LC       Item     Rolledfcst
MW92    1358576     2125
RM11    1358576     3955
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
SW92    1267890     670
RM11    1267890     890
WK14    1267890     245', header = T)


df2 <- read.table(text = ' Item   LC1    LC2     LC3
    1358576 RM11    MW92    SW92
    1358576 RM11    WK14    NA
    1267890 MW92    SW92    NA
    1267890 RM11    WK14    NA', header = T)

将LC列收集到单个列中，并创建一个包含排序顺序的ID列

library(tidyr)
library(dplyr)

df2 <- df2 %>%
  gather(LC, value,2:4) %>%
  mutate(sort_id = paste0(Item,value))

#     Item  LC value     sort_id
#1  1358576 LC1  RM11 1358576RM11
#2  1358576 LC1  RM11 1358576RM11
#3  1267890 LC1  MW92 1267890MW92
#4  1267890 LC1  RM11 1267890RM11
#5  1358576 LC2  MW92 1358576MW92
#6  1358576 LC2  WK14 1358576WK14
#7  1267890 LC2  SW92 1267890SW92
#8  1267890 LC2  WK14 1267890WK14
#9  1358576 LC3  SW92 1358576SW92
#10 1358576 LC3  <NA>   1358576NA
#11 1267890 LC3  <NA>   1267890NA
#12 1267890 LC3  <NA>   1267890NA

以相同的方式为df1创建一个sort_id列，并应用df2中的因子水平。使用arrange时，因子级别将控制排序顺序。

df1 %>%
  mutate(sort_id = factor(paste0(Item,LC), levels = unique(df2$sort_id)),
         Item = factor(Item, levels = unique(df2$Item))) %>%
  group_by(Item) %>%
  arrange(sort_id, .by_group = T) %>%
  select(-sort_id)

#LC    Item    Rolledfcst
#  <fct> <fct>        <int>
#1 RM11  1358576       3955
#2 MW92  1358576       2125
#3 WK14  1358576        307
#4 SW92  1358576        190
#5 MW92  1267890        200
#6 RM11  1267890        890
#7 SW92  1267890        670
#8 WK14  1267890        245

Answer 3

使用split和mapply的基本R方法。根据{{1}}中Item的出现，将factor转换为df1，然后根据split列和{{1} }这些df1值和未列出的Item值，以排序的方式将原始行号分配给子集。

match

根据列条件对数据框进行排序

3 个答案: