使用for循环和if条件创建新变量

时间:2014-11-04 04:00:02

标签: r variables if-statement for-loop

我有一个数据框如下

head(sip1)

   ID INTERVIEWDATE SS1 SS2 SS2b SS3 SS4_1 SS4_2 SS4_3 SS4_4 SS4_5 SS4_6 SS4_7 SS4_8 SS4_9 SS4_10 SS4_11 SS4_12 SS4_13 SS4_14 SS4_15 SS4_16 SS4_17 SS4_18 SS4_19 SS4_20 SS4_21 SS4_22
1  1    2014-03-19   4   1    1   2     1     1     4     1     1     1     1     1     1      1      1      1      1      1      1      1      3      1      1      1      1      1
2  2    2014-03-19   2   1    5   1     1     1     1     1     1     1     2     1     1      1      1      1      1      1      4      1      1      1      1      1      1      1
3  3    2014-03-19   3   1    8   2     1     1     1     4     1     1     4     1     1      1      1      1      1      1      1      1      1      1      1      1      1      1
4  4    2014-03-18   3   1    5   1     1     1     1     1     1     1     1     1     3      1      1      1      1      1      1      1      1      1      1      1      1      1
5  5    2014-03-18   3   1    8   1     1     1     1     1     1     1     1     1     3      1      1      1      1      2      1      1      1      1      1      1      1      1
6  6    2014-03-19   3   1    1   2     1     1     1     1     1     1     1     1     3      1      1      3      1      3      1      1      1      1      1      1      1      1

      SS4_23 SS4_24 SS4_25 SS4_26 SS4_27 SS4_28 SS4_29 SS4_30 SS5 SS6 SS7 HS1_C1_A HS1_C1_B HS1_C1_C HS1_C1_D HS1C1
    1      1      1      1      1      1      1      1      1   1   2   2        1        2        2        2     1
    2      1      1      1      1      1      1      1      1   1   2   2        2        2        2        2     0
    3      1      1      1      1      1      1      1      1   1   2   2        1        2        2        2     1
    4      1      1      1      1      1      1      1      1   2   2   2        1        2        2        2     1
    5      1      1      1      1      1      1      1      1   1   2   2        2        2        2        2     0
    6      1      1      1      1      1      1      1      1   1   2   2        1        2        2        2     1

在这里,我想创建一系列关于newSS4_1的新变量(例如newSS4_2newSS4_3newSS4_30 ... SS4_1)。 .. SS4_30,使用newSS4_1 [SS4_1==1] <- 0; newSS4_1 [SS4_1!=1] <- 1的条件。我希望在条件之后将30个变量设为二项式:如果旧值= 1,则新值= 0;如果旧值!= 1,则新值= 1。并将它们创建为新变量。

有人可以帮忙吗?我不知道如何在循环中执行此操作来编写30个新变量,以及如何使用if条件?

谢谢,奇

4 个答案:

答案 0 :(得分:2)

这是一种方法。在这里,您有一个如您所述的ifelse语句。 contains("SS4_")表示R将此ifelse语句应用于包含&#34; SS4 _&#34;的所有列。在这个建议中,我正在覆盖现有的列。

foo <- data.frame(id = 1:5,
                 date = c("2014-03-19", "2014-03-19", "2014-03-19", "2014-03-18", "2014-03-18"),
                 SS4_1 = c(1,0,2,1,0),
                 SS4_2 = c(3,1,0,7,2),
                 stringsAsFactors = FALSE)

#  id       date SS4_1 SS4_2
#1  1 2014-03-19     1     3
#2  2 2014-03-19     0     1
#3  3 2014-03-19     2     0
#4  4 2014-03-18     1     7
#5  5 2014-03-18     0     2   

library(dplyr)

ana <- foo %>%
       mutate_each(funs(ifelse(. == 1, 0, 1)), contains("SS4_"))

colnames(ana) <- gsub("vars", "newSS4_", colnames(ana))

#  id       date SS4_1 SS4_2 newSS4_1 newSS4_2
#1  1 2014-03-19     1     3        0        1
#2  2 2014-03-19     0     1        1        0
#3  3 2014-03-19     2     0        1        1
#4  4 2014-03-18     1     7        0        1
#5  5 2014-03-18     0     2        1        1

答案 1 :(得分:2)

使用data.tablesip1并从@ user20650的帖子编制索引。

library(data.table)
nm1 <- grep("^SS4_", colnames(sip1), value=TRUE)
setDT(sip1)[,(paste0("new", nm1)):= lapply(.SD, function(x) (x!=1)+0),
                                                       .SDcols=nm1]
grep("SS4", colnames(sip1), value=TRUE)
# [1] "SS4_1"     "SS4_2"     "SS4_3"     "SS4_4"     "SS4_5"     "SS4_6"    
# [7] "SS4_7"     "SS4_8"     "SS4_9"     "SS4_10"    "SS4_11"    "SS4_12"   
# [13] "SS4_13"    "SS4_14"    "SS4_15"    "SS4_16"    "SS4_17"    "SS4_18"   
# [19] "SS4_19"    "SS4_20"    "SS4_21"    "SS4_22"    "SS4_23"    "SS4_24"   
# [25] "SS4_25"    "SS4_26"    "SS4_27"    "SS4_28"    "SS4_29"    "SS4_30"   
# [31] "newSS4_1"  "newSS4_2"  "newSS4_3"  "newSS4_4"  "newSS4_5"  "newSS4_6" 
# [37] "newSS4_7"  "newSS4_8"  "newSS4_9"  "newSS4_10" "newSS4_11" "newSS4_12"
# [43] "newSS4_13" "newSS4_14" "newSS4_15" "newSS4_16" "newSS4_17" "newSS4_18"
# [49] "newSS4_19" "newSS4_20" "newSS4_21" "newSS4_22" "newSS4_23" "newSS4_24"
# [55] "newSS4_25" "newSS4_26" "newSS4_27" "newSS4_28" "newSS4_29" "newSS4_30"

答案 2 :(得分:1)

使用Jazzurro的数据

(foo[paste0("SS4_", 1:2)] !=1) + 0
#     SS4_1 SS4_2
#[1,]     0     1
#[2,]     1     0
#[3,]     1     1
#[4,]     0     1
#[5,]     1     1

foo[paste0("SS4_", 1:2)] !=1测试值不等于1的值会产生TRUEFALSE。一种通过添加零来将其转换为1和0的方法。

编辑:添加到现有数据集

newvars <- (foo[paste0("SS4_", 1:2)] !=1) + 0
colnames(newvars) <- paste0("new", colnames(newvars))
cbind(foo, newvars)

EDIT2:使用更大的数据集sip1,为以SS4_开头的列创建索引。您可以使用grep

执行此操作
indx <- grep("SS4_", colnames(sip1))
res <-  cbind(sip1, setNames( as.data.frame((sip1[indx]!=1)+0), 
                     paste0("new", colnames(sip1[indx]))))

数据

sip1 <- structure(list(ID = 1:6, INTERVIEWDATE = c("2014-03-19", "2014-03-19", 
"2014-03-19", "2014-03-18", "2014-03-18", "2014-03-19"), SS1 = c(4L, 
2L, 3L, 3L, 3L, 3L), SS2 = c(1L, 1L, 1L, 1L, 1L, 1L), SS2b = c(1L, 
5L, 8L, 5L, 8L, 1L), SS3 = c(2L, 1L, 2L, 1L, 1L, 2L), SS4_1 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_2 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_3 = c(4L, 
1L, 1L, 1L, 1L, 1L), SS4_4 = c(1L, 1L, 4L, 1L, 1L, 1L), SS4_5 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_6 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_7 = c(1L, 
2L, 4L, 1L, 1L, 1L), SS4_8 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_9 = c(1L, 
1L, 1L, 3L, 3L, 3L), SS4_10 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_11 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_12 = c(1L, 1L, 1L, 1L, 1L, 3L), SS4_13 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_14 = c(1L, 1L, 1L, 1L, 2L, 3L), SS4_15 = c(1L, 
4L, 1L, 1L, 1L, 1L), SS4_16 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_17 = c(3L, 
1L, 1L, 1L, 1L, 1L), SS4_18 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_19 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_20 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_21 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_22 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_23 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_24 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_25 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_26 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_27 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_28 = c(1L, 1L, 1L, 1L, 1L, 1L), SS4_29 = c(1L, 
1L, 1L, 1L, 1L, 1L), SS4_30 = c(1L, 1L, 1L, 1L, 1L, 1L), SS5 = c(1L, 
1L, 1L, 2L, 1L, 1L), SS6 = c(2L, 2L, 2L, 2L, 2L, 2L), SS7 = c(2L, 
2L, 2L, 2L, 2L, 2L), HS1_C1_A = c(1L, 2L, 1L, 1L, 2L, 1L), HS1_C1_B = c(2L, 
2L, 2L, 2L, 2L, 2L), HS1_C1_C = c(2L, 2L, 2L, 2L, 2L, 2L), HS1_C1_D = c(2L, 
2L, 2L, 2L, 2L, 2L), HS1C1 = c(1L, 0L, 1L, 1L, 0L, 1L)), .Names = c("ID", 
"INTERVIEWDATE", "SS1", "SS2", "SS2b", "SS3", "SS4_1", "SS4_2", 
"SS4_3", "SS4_4", "SS4_5", "SS4_6", "SS4_7", "SS4_8", "SS4_9", 
"SS4_10", "SS4_11", "SS4_12", "SS4_13", "SS4_14", "SS4_15", "SS4_16", 
"SS4_17", "SS4_18", "SS4_19", "SS4_20", "SS4_21", "SS4_22", "SS4_23", 
"SS4_24", "SS4_25", "SS4_26", "SS4_27", "SS4_28", "SS4_29", "SS4_30", 
"SS5", "SS6", "SS7", "HS1_C1_A", "HS1_C1_B", "HS1_C1_C", "HS1_C1_D", 
"HS1C1"), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")

答案 3 :(得分:0)

尝试:

nddf = ddf
for(i in 3:length(ddf))     nddf[[length(nddf)+1]] = ifelse(nddf[[i]]==1, 0,1)
names(nddf)[(length(ddf)+1):length(nddf)]= paste0('new',names(ddf)[3:length(ddf)])
nddf

  ID INTERVIEWDATE SS1 SS2 SS2b SS3 SS4_1 SS4_2 SS4_3 SS4_4 SS4_5 SS4_6 SS4_7 SS4_8 SS4_9 SS4_10 SS4_11 SS4_12 SS4_13 SS4_14
1  1    2014-03-19   4   1    1   2     1     1     4     1     1     1     1     1     1      1      1      1      1      1
2  2    2014-03-19   2   1    5   1     1     1     1     1     1     1     2     1     1      1      1      1      1      1
3  3    2014-03-19   3   1    8   2     1     1     1     4     1     1     4     1     1      1      1      1      1      1
4  4    2014-03-18   3   1    5   1     1     1     1     1     1     1     1     1     3      1      1      1      1      1
5  5    2014-03-18   3   1    8   1     1     1     1     1     1     1     1     1     3      1      1      1      1      2
6  6    2014-03-19   3   1    1   2     1     1     1     1     1     1     1     1     3      1      1      3      1      3
  SS4_15 SS4_16 SS4_17 SS4_18 SS4_19 SS4_20 SS4_21 SS4_22 newSS1 newSS2 newSS2b newSS3 newSS4_1 newSS4_2 newSS4_3 newSS4_4
1      1      1      3      1      1      1      1      1      1      0       0      1        0        0        1        0
2      4      1      1      1      1      1      1      1      1      0       1      0        0        0        0        0
3      1      1      1      1      1      1      1      1      1      0       1      1        0        0        0        1
4      1      1      1      1      1      1      1      1      1      0       1      0        0        0        0        0
5      1      1      1      1      1      1      1      1      1      0       1      0        0        0        0        0
6      1      1      1      1      1      1      1      1      1      0       0      1        0        0        0        0
  newSS4_5 newSS4_6 newSS4_7 newSS4_8 newSS4_9 newSS4_10 newSS4_11 newSS4_12 newSS4_13 newSS4_14 newSS4_15 newSS4_16 newSS4_17
1        0        0        0        0        0         0         0         0         0         0         0         0         1
2        0        0        1        0        0         0         0         0         0         0         1         0         0
3        0        0        1        0        0         0         0         0         0         0         0         0         0
4        0        0        0        0        1         0         0         0         0         0         0         0         0
5        0        0        0        0        1         0         0         0         0         1         0         0         0
6        0        0        0        0        1         0         0         1         0         1         0         0         0
  newSS4_18 newSS4_19 newSS4_20 newSS4_21 newSS4_22
1         0         0         0         0         0
2         0         0         0         0         0
3         0         0         0         0         0
4         0         0         0         0         0
5         0         0         0         0         0
6         0         0         0         0         0