dat <- data.frame(A=c("name1", "name2", "name3"),
B=c(0,1,0), C=c(0,0,5), D= c(4,4,0), E=c(1,0,0), F=c(4,0,0) )
desiredresult <- data.frame(A=c("name1", "name2", "name3"),
B=c(NA,1,NA), C=c(NA,0,5), D= c(4,4,0), E=c(1,0,NA), F=c(4,NA,NA))
我想在每一行中用NA替换0值,直到遇到正值(数据集中没有负值)。除此之外我想要替换所有值,如果它们的结尾都是零,在最后一个正值之后留下第一个0。等5,0,0,0 - &gt; 5,0,NA,NA
提供了具有所需结果的示例数据。我正试图接近这样的事情,但需要有5个以上的条件来覆盖这一切。有一个更好的方法吗?也许有data.table?
dat$B[dat$B == 0 & (dat$C!=0 | dat$D!=0)] <- NA
dat$C[dat$C == 0 & dat$D!=0 & is.na(dat$B)] <- NA
答案 0 :(得分:2)
这应该有效:
#Apply the first rule: convert 0 to NA until we find a non negative
res1<-t(apply(dat[,-1], 1, function(x) {
xc <- cumsum(x) #cumulative sum
x[xc==0]<-NA #NA where cumulative sum iz 0
x
}))
# Apply the second rule
res2<-t(apply(res1, 1, function(x) {
xc <- cumsum(rev(x)) #reverse the sum
xc<-c(tail(xc,-1),1) # shift the sum
res<-rev(x) #reverse the vector
res[xc==0]<-NA
rev(res)
}))
#Reconstruct the data frame
cbind(data.frame(name=dat[,1]),res2)
# name B C D E F
#1 name1 NA NA 4 1 4
#2 name2 1 0 4 0 NA
#3 name3 NA 5 0 NA NA
答案 1 :(得分:2)
使用data.table
- 包,您可以按如下方式处理:
cols <- names(dat)[2:6]
library(data.table)
setDT(dat)[, (cols) := {x <- unlist(.SD);
x[cumsum(x)==0] <- NA;
l <- c(tail(cumsum(rev(x)),-1),1) == 0;
x[rev(l)] <- NA;
names(x) <- cols;
as.list(x)},
by = A]
你得到:
> dat
A B C D E F
1: name1 NA NA 4 1 4
2: name2 1 0 4 0 NA
3: name3 NA 5 0 NA NA
同样的想法,但后来用基础R:
dl <- as.data.frame(t(dat[,-1]))
idx1 <- cumsum(dl) == 0
idx2 <- sapply(dl, function(x) {
l <- c(tail(cumsum(rev(x)),-1),1) == 0
l[is.na(l)] <- FALSE
rev(l)
})
dl[idx1 | idx2] <- NA
dat[,-1] <- t(dl)
会得到相同的结果:
> dat
A B C D E F
1 name1 NA NA 4 1 4
2 name2 1 0 0 4 0
3 name3 NA 5 0 NA NA
新示例数据:
dat <- data.frame(A=c("name1", "name2", "name3"),
B=c(0,1,0), C=c(0,0,5), D=c(4,0,0), E=c(1,4,0), F=c(4,0,0) )