我有以下数据框:
df<- structure(list(ID = c(1, 2, 3, 4, 5, 6), V1 = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), V2 = structure(c(1L,
NA, 2L, NA, NA, NA), .Label = c("LISINOPRIL", "VALSARTAN"), class = "factor"),
V3 = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), .Label = c("LISINOPRIL", "NA"), class = "factor"),
V4 = structure(c(NA, NA, 1L, 1L, 2L, NA), .Label = c("LISINOPRIL",
"VALSARTAN", "NA"), class = "factor"), V5 = structure(c(1L,
1L, 2L, NA, 3L, 2L), .Label = c("LISINOPRIL", "LOSARTAN",
"VALSARTAN"), class = "factor")), .Names = c("ID", "V1",
"V2", "V3", "V4", "V5"), row.names = c(NA, 6L), class = "data.frame")
> df
ID V1 V2 V3 V4 V5
1 1 <NA> LISINOPRIL <NA> <NA> LISINOPRIL
2 2 <NA> <NA> <NA> <NA> LISINOPRIL
3 3 <NA> VALSARTAN <NA> LISINOPRIL LOSARTAN
4 4 <NA> <NA> <NA> LISINOPRIL <NA>
5 5 <NA> <NA> <NA> VALSARTAN VALSARTAN
6 6 <NA> <NA> LISINOPRIL <NA> LOSARTAN
我想创建一个新列User,如果此行中至少有两个连续值不是NA,则取值为1。否则它将为零:
> df
ID V1 V2 V3 V4 V5 User
1 1 <NA> LISINOPRIL <NA> <NA> LISINOPRIL 0
2 2 <NA> <NA> <NA> <NA> LISINOPRIL 0
3 3 <NA> VALSARTAN <NA> LISINOPRIL LOSARTAN 1
4 4 <NA> <NA> <NA> LISINOPRIL <NA> 0
5 5 <NA> <NA> <NA> VALSARTAN VALSARTAN 1
6 6 <NA> <NA> LISINOPRIL <NA> LOSARTAN 0
我非常感谢你的帮助。
答案 0 :(得分:3)
你可以尝试
df$User <- (apply(!is.na(df[-1]),1,
function(x) any(with(rle(x), lengths[values])>1)))+0
df
# ID V1 V2 V3 V4 V5 User
#1 1 <NA> LISINOPRIL NA NA LISINOPRIL 0
#2 2 <NA> <NA> <NA> <NA> LISINOPRIL 0
#3 3 <NA> VALSARTAN <NA> LISINOPRIL LOSARTAN 1
#4 4 <NA> <NA> <NA> LISINOPRIL <NA> 0
#5 5 <NA> <NA> <NA> VALSARTAN VALSARTAN 1
#6 6 <NA> <NA> <NA> <NA> LOSARTAN 0
或者你可以做到
indx <-!is.na(df[-1])
unname(!!rowSums((indx+cbind(indx[,-1], FALSE))>1))+0
#[1] 0 0 1 0 1 0