我有一个数据框如下
head(sip1)
ID INTERVIEWDATE SS1 SS2 SS2b SS3 SS4_1 SS4_2 SS4_3 SS4_4 SS4_5 SS4_6 SS4_7 SS4_8 SS4_9 SS4_10 SS4_11 SS4_12 SS4_13 SS4_14 SS4_15 SS4_16 SS4_17 SS4_18 SS4_19 SS4_20 SS4_21 SS4_22
1 1 2014-03-19 4 1 1 2 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1
2 2 2014-03-19 2 1 5 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1
3 3 2014-03-19 3 1 8 2 1 1 1 4 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
4 4 2014-03-18 3 1 5 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1
5 5 2014-03-18 3 1 8 1 1 1 1 1 1 1 1 1 3 1 1 1 1 2 1 1 1 1 1 1 1 1
6 6 2014-03-19 3 1 1 2 1 1 1 1 1 1 1 1 3 1 1 3 1 3 1 1 1 1 1 1 1 1
SS4_23 SS4_24 SS4_25 SS4_26 SS4_27 SS4_28 SS4_29 SS4_30 SS5 SS6 SS7 HS1_C1_A HS1_C1_B HS1_C1_C HS1_C1_D HS1C1
1 1 1 1 1 1 1 1 1 1 2 2 1 2 2 2 1
2 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 0
3 1 1 1 1 1 1 1 1 1 2 2 1 2 2 2 1
4 1 1 1 1 1 1 1 1 2 2 2 1 2 2 2 1
5 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 0
6 1 1 1 1 1 1 1 1 1 2 2 1 2 2 2 1
在这里,我想创建一系列关于newSS4_1
的新变量(例如newSS4_2
,newSS4_3
,newSS4_30
... SS4_1
)。 .. SS4_30
,使用newSS4_1 [SS4_1==1] <- 0; newSS4_1 [SS4_1!=1] <- 1
的条件。我希望在条件之后将30个变量设为二项式:如果旧值= 1,则新值= 0;如果旧值!= 1,则新值= 1。并将它们创建为新变量。
有人可以帮忙吗?我不知道如何在循环中执行此操作来编写30个新变量,以及如何使用if条件?
谢谢,奇
答案 0 :(得分:2)
这是一种方法。在这里,您有一个如您所述的ifelse语句。 contains("SS4_")
表示R将此ifelse语句应用于包含&#34; SS4 _&#34;的所有列。在这个建议中,我正在覆盖现有的列。
foo <- data.frame(id = 1:5,
date = c("2014-03-19", "2014-03-19", "2014-03-19", "2014-03-18", "2014-03-18"),
SS4_1 = c(1,0,2,1,0),
SS4_2 = c(3,1,0,7,2),
stringsAsFactors = FALSE)
# id date SS4_1 SS4_2
#1 1 2014-03-19 1 3
#2 2 2014-03-19 0 1
#3 3 2014-03-19 2 0
#4 4 2014-03-18 1 7
#5 5 2014-03-18 0 2
library(dplyr)
ana <- foo %>%
mutate_each(funs(ifelse(. == 1, 0, 1)), contains("SS4_"))
colnames(ana) <- gsub("vars", "newSS4_", colnames(ana))
# id date SS4_1 SS4_2 newSS4_1 newSS4_2
#1 1 2014-03-19 1 3 0 1
#2 2 2014-03-19 0 1 1 0
#3 3 2014-03-19 2 0 1 1
#4 4 2014-03-18 1 7 0 1
#5 5 2014-03-18 0 2 1 1
答案 1 :(得分:2)
使用data.table
。 sip1
并从@ user20650的帖子编制索引。
library(data.table)
nm1 <- grep("^SS4_", colnames(sip1), value=TRUE)
setDT(sip1)[,(paste0("new", nm1)):= lapply(.SD, function(x) (x!=1)+0),
.SDcols=nm1]
grep("SS4", colnames(sip1), value=TRUE)
# [1] "SS4_1" "SS4_2" "SS4_3" "SS4_4" "SS4_5" "SS4_6"
# [7] "SS4_7" "SS4_8" "SS4_9" "SS4_10" "SS4_11" "SS4_12"
# [13] "SS4_13" "SS4_14" "SS4_15" "SS4_16" "SS4_17" "SS4_18"
# [19] "SS4_19" "SS4_20" "SS4_21" "SS4_22" "SS4_23" "SS4_24"
# [25] "SS4_25" "SS4_26" "SS4_27" "SS4_28" "SS4_29" "SS4_30"
# [31] "newSS4_1" "newSS4_2" "newSS4_3" "newSS4_4" "newSS4_5" "newSS4_6"
# [37] "newSS4_7" "newSS4_8" "newSS4_9" "newSS4_10" "newSS4_11" "newSS4_12"
# [43] "newSS4_13" "newSS4_14" "newSS4_15" "newSS4_16" "newSS4_17" "newSS4_18"
# [49] "newSS4_19" "newSS4_20" "newSS4_21" "newSS4_22" "newSS4_23" "newSS4_24"
# [55] "newSS4_25" "newSS4_26" "newSS4_27" "newSS4_28" "newSS4_29" "newSS4_30"
答案 2 :(得分:1)
使用Jazzurro的数据
(foo[paste0("SS4_", 1:2)] !=1) + 0
# SS4_1 SS4_2
#[1,] 0 1
#[2,] 1 0
#[3,] 1 1
#[4,] 0 1
#[5,] 1 1
foo[paste0("SS4_", 1:2)] !=1
测试值不等于1的值会产生TRUE
或FALSE
。一种通过添加零来将其转换为1和0的方法。
编辑:添加到现有数据集
newvars <- (foo[paste0("SS4_", 1:2)] !=1) + 0
colnames(newvars) <- paste0("new", colnames(newvars))
cbind(foo, newvars)
EDIT2:使用更大的数据集sip1
,为以SS4_
开头的列创建索引。您可以使用grep
indx <- grep("SS4_", colnames(sip1))
res <- cbind(sip1, setNames( as.data.frame((sip1[indx]!=1)+0),
paste0("new", colnames(sip1[indx]))))
sip1 <- structure(list(ID = 1:6, INTERVIEWDATE = c("2014-03-19", "2014-03-19",
"2014-03-19", "2014-03-18", "2014-03-18", "2014-03-19"), SS1 = c(4L,
2L, 3L, 3L, 3L, 3L), SS2 = c(1L, 1L, 1L, 1L, 1L, 1L), SS2b = c(1L,
5L, 8L, 5L, 8L, 1L), SS3 = c(2L, 1L, 2L, 1L, 1L, 2L), SS4_1 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_2 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_3 = c(4L,
1L, 1L, 1L, 1L, 1L), SS4_4 = c(1L, 1L, 4L, 1L, 1L, 1L), SS4_5 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_6 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_7 = c(1L,
2L, 4L, 1L, 1L, 1L), SS4_8 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_9 = c(1L,
1L, 1L, 3L, 3L, 3L), SS4_10 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_11 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_12 = c(1L, 1L, 1L, 1L, 1L, 3L), SS4_13 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_14 = c(1L, 1L, 1L, 1L, 2L, 3L), SS4_15 = c(1L,
4L, 1L, 1L, 1L, 1L), SS4_16 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_17 = c(3L,
1L, 1L, 1L, 1L, 1L), SS4_18 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_19 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_20 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_21 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_22 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_23 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_24 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_25 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_26 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_27 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_28 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_29 = c(1L,
1L, 1L, 1L, 1L, 1L), SS4_30 = c(1L, 1L, 1L, 1L, 1L, 1L), SS5 = c(1L,
1L, 1L, 2L, 1L, 1L), SS6 = c(2L, 2L, 2L, 2L, 2L, 2L), SS7 = c(2L,
2L, 2L, 2L, 2L, 2L), HS1_C1_A = c(1L, 2L, 1L, 1L, 2L, 1L), HS1_C1_B = c(2L,
2L, 2L, 2L, 2L, 2L), HS1_C1_C = c(2L, 2L, 2L, 2L, 2L, 2L), HS1_C1_D = c(2L,
2L, 2L, 2L, 2L, 2L), HS1C1 = c(1L, 0L, 1L, 1L, 0L, 1L)), .Names = c("ID",
"INTERVIEWDATE", "SS1", "SS2", "SS2b", "SS3", "SS4_1", "SS4_2",
"SS4_3", "SS4_4", "SS4_5", "SS4_6", "SS4_7", "SS4_8", "SS4_9",
"SS4_10", "SS4_11", "SS4_12", "SS4_13", "SS4_14", "SS4_15", "SS4_16",
"SS4_17", "SS4_18", "SS4_19", "SS4_20", "SS4_21", "SS4_22", "SS4_23",
"SS4_24", "SS4_25", "SS4_26", "SS4_27", "SS4_28", "SS4_29", "SS4_30",
"SS5", "SS6", "SS7", "HS1_C1_A", "HS1_C1_B", "HS1_C1_C", "HS1_C1_D",
"HS1C1"), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")
答案 3 :(得分:0)
尝试:
nddf = ddf
for(i in 3:length(ddf)) nddf[[length(nddf)+1]] = ifelse(nddf[[i]]==1, 0,1)
names(nddf)[(length(ddf)+1):length(nddf)]= paste0('new',names(ddf)[3:length(ddf)])
nddf
ID INTERVIEWDATE SS1 SS2 SS2b SS3 SS4_1 SS4_2 SS4_3 SS4_4 SS4_5 SS4_6 SS4_7 SS4_8 SS4_9 SS4_10 SS4_11 SS4_12 SS4_13 SS4_14
1 1 2014-03-19 4 1 1 2 1 1 4 1 1 1 1 1 1 1 1 1 1 1
2 2 2014-03-19 2 1 5 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
3 3 2014-03-19 3 1 8 2 1 1 1 4 1 1 4 1 1 1 1 1 1 1
4 4 2014-03-18 3 1 5 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1
5 5 2014-03-18 3 1 8 1 1 1 1 1 1 1 1 1 3 1 1 1 1 2
6 6 2014-03-19 3 1 1 2 1 1 1 1 1 1 1 1 3 1 1 3 1 3
SS4_15 SS4_16 SS4_17 SS4_18 SS4_19 SS4_20 SS4_21 SS4_22 newSS1 newSS2 newSS2b newSS3 newSS4_1 newSS4_2 newSS4_3 newSS4_4
1 1 1 3 1 1 1 1 1 1 0 0 1 0 0 1 0
2 4 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0
3 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1
4 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0
5 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0
6 1 1 1 1 1 1 1 1 1 0 0 1 0 0 0 0
newSS4_5 newSS4_6 newSS4_7 newSS4_8 newSS4_9 newSS4_10 newSS4_11 newSS4_12 newSS4_13 newSS4_14 newSS4_15 newSS4_16 newSS4_17
1 0 0 0 0 0 0 0 0 0 0 0 0 1
2 0 0 1 0 0 0 0 0 0 0 1 0 0
3 0 0 1 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 1 0 0 0 0 0 0 0 0
5 0 0 0 0 1 0 0 0 0 1 0 0 0
6 0 0 0 0 1 0 0 1 0 1 0 0 0
newSS4_18 newSS4_19 newSS4_20 newSS4_21 newSS4_22
1 0 0 0 0 0
2 0 0 0 0 0
3 0 0 0 0 0
4 0 0 0 0 0
5 0 0 0 0 0
6 0 0 0 0 0