我有一个0/1虚拟变量的数据帧。每个虚拟变量只取值1一次。对于每一列,我希望将从观察值中计算的n个前/后观察值替换为特定值(比如1)。
因此对于单个向量,n = 1:
c(0, 0, 1, 0, 0)
我想要
c(0, 1, 1, 1, 0)
对于n列并且允许替换不同数量的先前/后续观察(例如在之前和之后的n-1),什么是良好的通用方法?
感谢您的帮助!
答案 0 :(得分:3)
x<-c(0,0,1,0,0)
ind<-which(x==1)
x[(ind-1):(ind+x)]<-1
答案 1 :(得分:1)
您可以做的是:
vec <- c(0, 0, 1, 0, 0)
sapply(1:length(vec), function(i) {
minval <- max(0, i - 1)
maxval <- min(i + 1, length(vec))
return(sum(vec[minval:maxval]))
})
# [1] 0 1 1 1 0
或者把它放在一个函数中(相同的代码,但更紧凑)
f <- function(vec){
sapply(1:length(vec), function(i)
sum(vec[max(0, i-1):min(i+1, length(vec))]))
}
f(vec)
# [1] 0 1 1 1 0
为了比较两种不同的解决方案,我使用microbenchmark
快速运行基准测试,获胜者是:显然@Shenglin的代码....总是很高兴看到简单的解决方案(以及看看有多复杂一些(我的)解决方案可以)。
fDavid <- function(vec){
sapply(1:length(vec), function(i)
sum(vec[max(0, i-1):min(i+1, length(vec))]))
}
fHeroka <- function(vec){
res <- vec
test <- which(vec==1)
#create indices to be replaced
n=1 #variable n
replace_indices <- c(test+(1:n),test-(1:n))
#filter out negatives (may happen with larger n)
replace_indices <- replace_indices[replace_indices>0]
#replace items in 'res' that need to be replaced with 1
res[replace_indices] <- 1
}
fShenglin <- function(vec){
ind<-which(vec==1)
vec[(ind-1):(ind+x)]<-1
}
vect <- sample(0:1, size = 1000, replace = T)
library(microbenchmark)
microbenchmark(fHeroka(vect), fDavid(vect), fShenglin)
# # Unit: nanoseconds
# expr min lq mean median uq max
# fHeroka(vect) 38929 42999 54422.57 49546 61755.5 145451
# fDavid(vect) 2463805 2577935 2875024.99 2696844 2849548.5 5994596
# fShenglin 0 0 138.63 1 355.0 1063
# neval cld
# 100 a
# 100 b
# 100 a
# Warning message:
# In microbenchmark(fHeroka(vect), fDavid(vect), fShenglin) :
# Could not measure a positive execution time for 30 evaluations.
答案 2 :(得分:1)
另一种选择:
f <- function(x, pre, post) {
idx <- which.max(x)
x[max(1, (idx-pre)):min(length(x), (idx+post))] <- 1
x
}
示例数据:
df <- data.frame(x = c(0, 0, 1, 0, 0), y = c(0, 1, 0, 0, 0))
应用:
df[] <- lapply(df, f, pre=2, post=1)
#df
# x y
#1 1 1
#2 1 1
#3 1 1
#4 1 0
#5 0 0
答案 3 :(得分:0)
这可能是一个开始:
myv <- c(0, 0, 1, 0, 0)
#make a copy
res <- myv
#check where the ones are
test <- which(myv==1)
#create indices to be replaced
n=1 #variable n
replace_indices <- c(test+(1:n),test-(1:n))
#filter out negatives (may happen with larger n)
replace_indices <- replace_indices[replace_indices>0]
#replace items in 'res' that need to be replaced with 1
res[replace_indices] <- 1
res
> res
[1] 0 1 1 1 0
答案 4 :(得分:0)
这可能是一个解决方案:
dat<-data.frame(x=c(0,0,1,0,0,0),y=c(0,0,0,1,0,0),z=c(0,1,0,0,0,0))
which_to_change<-data.frame(prev=c(2,2,1),foll=c(1,1,3))
for(i in 1:nrow(which_to_change)){
dat[(which(dat[,i]==1)-which_to_change[i,1]):(which(dat[,i]==1)+which_to_change[i,2]),i]<-1
}