Question

我需要考虑因素水平为现有数据框创建一个新列。我有2个数据帧，称为dat_group和dat_prices。这些数据框如下所示。

dat_group

dat_price

   A  B  C
1  21 45 24
2  21 45 24
3  21 45 24
4  21 45 24
5  15 11 10
6  15 11 10
7  15 11 10
8  20 13 55
9  20 13 55
10 20 13 55

我需要考虑dat_group中的级别来粘贴A，B和C列的值。行序列应该是相同的顺序。如果我将新列创建为dat_group为“price”

dat_group$Price<-NA

然后数据框应该像;

   Group Price
1      A    21
2      A    21
3      A    21
4      A    21
5      A    15
6      A    15
7      A    15
8      A    20
9      A    20
10     A    20
11     C    24
12     C    24
13     C    24
14     C    24
15     C    10
16     C    10
17     C    10
18     C    55
19     C    55
20     C    55
21     B    45
22     B    45
23     B    45
24     B    45
25     B    11
26     B    11
27     B    11
28     B    13
29     B    13
30     B    13

我尝试使用一些可用的示例e.g.1 e.g.2执行此操作，但无效。

请有人帮助我。可以通过以下代码访问两个示例数据帧。我的实际数据集有几千行。

      dat_group<- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("A", "B", "C"), class = "factor")), .Names = "Group", class = "data.frame", row.names = c(NA, 
-30L))

    dat_price<-structure(list(A = c(21L, 21L, 21L, 21L, 15L, 15L, 15L, 20L, 
20L, 20L), B = c(45L, 45L, 45L, 45L, 11L, 11L, 11L, 13L, 13L, 
13L), C = c(24L, 24L, 24L, 24L, 10L, 10L, 10L, 55L, 55L, 55L)), .Names = c("A", 
"B", "C"), class = "data.frame", row.names = c(NA, -10L))

Answer 1

解决手头问题的更具防御性的解决方案。希望即使您的所有因子水平都不是相同的倍数，这也会有效。

library(dplyr); library(purrr); library(magrittr)

dat_group$original_order <- seq(1:nrow(dat_group))

dat_group %<>%
    split(.$Group) %>%
    map(~ mutate(., Price =  rep(na.omit(dat_price[,unique(Group)]), n()/length(na.omit(dat_price[,unique(Group)]))))) %>%
    bind_rows() %>%
    arrange(original_order) %>% 
    select(-original_order)

dat_group

   Group Price
1      A    21
2      A    21
3      A    21
4      A    21
5      A    15
6      A    15
7      A    15
8      A    20
9      A    20
10     A    20
11     C    24
12     C    24
13     C    24
14     C    24
15     C    10
16     C    10
17     C    10
18     C    55
19     C    55
20     C    55
21     B    45
22     B    45
23     B    45
24     B    45
25     B    11
26     B    11
27     B    11
28     B    13
29     B    13
30     B    13

原始（懒惰）解决方案：

dat_group$Price <- rep(unlist(dat_price), length.out = nrow(dat_group))

Answer 2

library(data.table)
dat_price <- as.data.table(dat_price)
dat_price_new <- cbind(dat_price[, c(1,3), with = FALSE], 
                   dat_price[, 2, with = FALSE])
melt(dat_price_new)

使用r

2 个答案: