将一个数据集中的0替换为另一个数据集中的值

时间:2013-12-01 22:22:20

标签: r dataset subset

当回归无法收敛时,我有0 User生成Rating的数据。

我想将这些0替换为不同矩阵中的User Mean值。

我有以下代码来执行此操作,但效率低下。任何人都有更有效的方式来编码这个问题吗?

pred <- data.frame(1:8, c(1,2,3,4,5,0,0,0), c(1,1,2,2,3,4,4,5))
names(pred) <- c("ID", "Rating", "User")
usermean <- data.frame(c(1,2,3,4,5), c(3,3,4,9,7))
names(usermean) <- c("User", "Mean")
temp <- subset(pred, pred$Rating ==0)
temp2 <- subset(pred, !pred$Rating == 0)
temp3 <- subset(usermean, User %in% temp$User)
temp4 <- join(temp, temp3, by = "User", type = "left")
temp4[,2] <- temp4[,4]
temp4 <- temp4[,1:3]
names(temp4) <- c("ID", "Rating", "User")
pred <- rbind(temp2, temp4)
temp <- NA; temp2 <- NA; temp3 <- NA; temp4 <- NA

2 个答案:

答案 0 :(得分:2)

pred$Rating[pred$Rating == 0 ]  <-  usermean$Mean[pred$User[pred$Rating == 0 ] ]

#> pred
#  ID Rating User
#1  1      1    1
#2  2      2    1
#3  3      3    2
#4  4      4    2
#5  5      5    3
#6  6      9    4
#7  7      9    4
#8  8      7    5

答案 1 :(得分:0)

transform(pred, Rating = Rating + 
                       (Rating == 0) * usermean$Mean[match(User, usermean$User)])

  ID Rating User
1  1      1    1
2  2      2    1
3  3      3    2
4  4      4    2
5  5      5    3
6  6      9    4
7  7      9    4
8  8      7    5