根据列条件对数据框进行排序

时间:2019-09-03 06:39:45

标签: r

我需要根据另一个dataframe2列对以下dataframe1进行排序:

Dataframe1:

LC       Item     Rolledfcst
MW92    1358576     2125
RM11    1358576     3955
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
SW92    1267890     670
RM11    1267890     890
WK14    1267890     245

数据框2:

      Item   LC1    LC2     LC3
    1358576 RM11    MW92    SW92
    1358576 RM11    WK14    NA
    1267890 MW92    SW92    NA
    1267890 RM11    WK14    NA

现在,对于Dataframe1中的每个项目,它都应按Dataframe2列的顺序对LC进行排序,以使第一列LC1元素然后是LC2以及之后的LC3。

注意:对于每个项目,在LC1或LC2或LC3中,任何列中都有2个LC,则顺序无关紧要

输出数据框:

LC       Item     Rolledfcst
RM11    1358576     3955
MW92    1358576     2125
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
RM11    1267890     890
SW92    1267890     670
WK14    1267890     245

3 个答案:

答案 0 :(得分:3)

一种tidyverse解决方案:

df1 <- structure(list(LC         = c("MW92", "RM11", "WK14", "SW92", "MW92", 
                                     "SW92","RM11", "WK14"), 
                      Item       = c(1358576L, 1358576L, 1358576L, 1358576L, 
                                     1267890L, 1267890L, 1267890L, 1267890L), 
                      Rolledfcst = c(2125L, 3955L, 307L, 190L, 200L, 670L, 
                                     890L, 245L)), 
                      class      = "data.frame", 
                      row.names  = c(NA, -8L))

df2 <- structure(list(Item      = c(1358576L, 1358576L, 1267890L, 1267890L), 
                      LC1       = c("RM11", "RM11", "MW92", "RM11"), 
                      LC2       = c("MW92", "WK14", "SW92", "WK14"), 
                      LC3       = c("SW92", NA, NA, NA)), 
                      class     = "data.frame",  
                      row.names = c(NA, -4L))

首先通过df2确定长格式的订单:

library(tidyverse)
(ord <- df2 %>%
    gather(LC, value, -Item) %>% 
    arrange(Item, LC) %>% 
    group_by(Item) %>% 
    mutate(order = as.numeric(factor(value, unique(value)))) %>% 
    filter(!is.na(order)) %>% unique())

# # A tibble: 8 x 4
# # Groups:   Item [2]
#      Item LC    value order
#     <int> <chr> <chr> <dbl>
# 1 1267890 LC1   MW92      1
# 2 1267890 LC1   RM11      2
# 3 1267890 LC2   SW92      3
# 4 1267890 LC2   WK14      4
# 5 1358576 LC1   RM11      1
# 6 1358576 LC2   MW92      2
# 7 1358576 LC2   WK14      3
# 8 1358576 LC3   SW92      4

现在,加入df1ord并排序为itemorder

left_join(df1, 
          ord %>% select(-LC), 
          by = c("Item", LC = "value")) %>% 
   arrange(desc(Item), order) %>% select(-order)

#     LC    Item Rolledfcst
# 1 RM11 1358576       3955
# 2 MW92 1358576       2125
# 3 WK14 1358576        307
# 4 SW92 1358576        190
# 5 MW92 1267890        200
# 6 RM11 1267890        890
# 7 SW92 1267890        670
# 8 WK14 1267890        245

根据此处的评论,这是一个基本的R解决方案:

## transform data to long format
ord <- reshape(df2, varying=names(df2)[-1], direction = "long", v.names = "LC")
## sort according to item and time
ord <- ord[order(-ord$Item, ord$time), ]
## remove NAs
ord <- ord[!is.na(ord$LC),]
## remove duplicates
ord <- ord[!duplicated(ord[, c(1, 3)]), ]
## add the order
split(ord$id, ord$Item) <- lapply(split(ord$id, ord$Item), seq_along)
## merge the data
df.res <- merge(df1, ord[, -2], by = c("Item", "LC"))
## sort according to order
df.res[order(-df.res$Item, df.res$id), -4]
#      Item   LC Rolledfcst
# 6 1358576 RM11       3955
# 5 1358576 MW92       2125
# 8 1358576 WK14        307
# 7 1358576 SW92        190
# 1 1267890 MW92        200
# 2 1267890 RM11        890
# 3 1267890 SW92        670
# 4 1267890 WK14        245

答案 1 :(得分:1)

读入数据:

df1 <- read.table(text = 'LC       Item     Rolledfcst
MW92    1358576     2125
RM11    1358576     3955
WK14    1358576     307 
SW92    1358576     190
MW92    1267890     200
SW92    1267890     670
RM11    1267890     890
WK14    1267890     245', header = T)


df2 <- read.table(text = ' Item   LC1    LC2     LC3
    1358576 RM11    MW92    SW92
    1358576 RM11    WK14    NA
    1267890 MW92    SW92    NA
    1267890 RM11    WK14    NA', header = T)

将LC列收集到单个列中,并创建一个包含排序顺序的ID列

library(tidyr)
library(dplyr)

df2 <- df2 %>%
  gather(LC, value,2:4) %>%
  mutate(sort_id = paste0(Item,value))

#     Item  LC value     sort_id
#1  1358576 LC1  RM11 1358576RM11
#2  1358576 LC1  RM11 1358576RM11
#3  1267890 LC1  MW92 1267890MW92
#4  1267890 LC1  RM11 1267890RM11
#5  1358576 LC2  MW92 1358576MW92
#6  1358576 LC2  WK14 1358576WK14
#7  1267890 LC2  SW92 1267890SW92
#8  1267890 LC2  WK14 1267890WK14
#9  1358576 LC3  SW92 1358576SW92
#10 1358576 LC3  <NA>   1358576NA
#11 1267890 LC3  <NA>   1267890NA
#12 1267890 LC3  <NA>   1267890NA

以相同的方式为df1创建一个sort_id列,并应用df2中的因子水平。使用arrange时,因子级别将控制排序顺序。

df1 %>%
  mutate(sort_id = factor(paste0(Item,LC), levels = unique(df2$sort_id)),
         Item = factor(Item, levels = unique(df2$Item))) %>%
  group_by(Item) %>%
  arrange(sort_id, .by_group = T) %>%
  select(-sort_id)

#LC    Item    Rolledfcst
#  <fct> <fct>        <int>
#1 RM11  1358576       3955
#2 MW92  1358576       2125
#3 WK14  1358576        307
#4 SW92  1358576        190
#5 MW92  1267890        200
#6 RM11  1267890        890
#7 SW92  1267890        670
#8 WK14  1267890        245

答案 2 :(得分:1)

使用splitmapply的基本R方法。根据{{​​1}}中Item的出现,将factor转换为df1,然后根据split列和{{1} }这些df1值和未列出的Item值,以排序的方式将原始行号分配给子集。

match
相关问题