为缺少值的每个组添加行

时间:2017-08-01 19:28:09

标签: r dataframe data.table

每个篮子可容纳的水果总数为10.对于每个篮子,如果计数为10并且缺少水果,我想为该篮子添加一行,其中0表示该计数水果。  以下是生成数据帧的代码。

Basket <- c("A","A","B","B","C","C","C")
Fruit <- c("Apple","Orange","Apple","Orange","Orange","Apple", "Guava")
count <- c("5","5","7","3","2","6","4")
data <- data.frame(Basket,Fruit,count)

  Basket  Fruit count
1      A  Apple     5
2      A Orange     5
3      B  Apple     7
4      B  Guava     3
5      C Orange     2
6      C  Apple     6
7      C  Guava     4

我基本上喜欢它看起来像这样:

Basket  Fruit count
1      A  Apple     5
2      A Orange     5
4      A  Guava     0
5      B  Apple     7
6      B Orange     0
7      B  Guava     3
8      C Orange     2
9      C  Apple     6
10     C  Guava     4

不完全确定循环是否是一种有效的方法,但对建议持开放态度。目标是为每种水果获得组间的准确平均值。

4 个答案:

答案 0 :(得分:1)

我知道传播和聚集功能,来自tydir包

library(tidyr)
data <- data %>%
  spread(Fruit, count, fill = 0) %>%
  gather(Fruit, count, -Basket)

要设置0,计数值必须是整数,而不是因子。 您可以使用

执行此操作
data$count <- as.integer(data$count))

答案 1 :(得分:1)

将您的data.frame转换为宽格式,用0而不是NA填充它,然后将其转换回高格式:

count <- c(5,5,7,3,2,6,4)       # should be integers, not strings
data <- data.frame(Basket,Fruit,count)

d1 <- tidyr::spread( data, Fruit, count, fill = 0 )
d2 <- tidyr::gather( d1, Fruit, count, -Basket )

答案 2 :(得分:0)

data <- data.frame(Basket,Fruit,count,stringsAsFactors = F)
full = merge(
    data,
    expand.grid(
        Basket=data$Basket,
        Fruit=data$Fruit
    ),
    all.y=T
)
full$count = ifelse(is.na(full$count), 0, full$count)

答案 3 :(得分:0)

只有在缺少一个水果水平时,计数才有效,否则应按预期工作

Basket <- c("A","A","B","B","C","C","C")
Fruit <- c("Apple","Orange","Apple","Orange","Orange","Apple", "Guava")
count <- c(5,5,7,3,2,6,4)
data <- data.frame(Basket,Fruit,count, stringsAsFactors = FALSE)

fruit_levels <- levels(as.factor(data$Fruit))
append_df <- data.frame(Basket = NA, Fruit = NA, count = NA)

for(i in levels(as.factor(data$Basket))){
    temp_df <- filter(data, Basket == i)
    temp_count <- 10 - sum(temp_df$count)
    if(length(levels(as.factor(temp_df$Fruit))) != length(fruit_levels)){

        temp_fruit <- cbind.data.frame(fruit = fruit_levels, count = ifelse(fruit_levels %in% temp_df$Fruit, 0, 1))
        temp_fruit2 <- filter(temp_fruit, count == 1) %>% select(fruit)
        temp_fruit3 <- temp_fruit2[,1] %>% as.character()

        temp_df_to_append <- data.frame(Basket = i, Fruit = temp_fruit3, count = temp_count)
        append_df <- rbind.data.frame(append_df, temp_df_to_append)
    }
}

data <- rbind.data.frame(data, append_df[-1,]) %>% arrange(Basket)