循环或函数创建新的cols并根据列表填充

时间:2014-07-18 18:10:07

标签: r list function loops

我想编写一个函数或循环来创建三个新列,然后使用相同的值或指定的值填充这些列,如果原始列中的值在三个指定列表中的一个之内。

例如,以下是数据的样子:

> data
    a1   a2   a3
1    C    C    A
2    A B_20 B_20
3    A    C B_30
4    C    C B_40
5    C    A    A
6 B_60 B_60 B_60
7    A    A    C
8    A    C B_80
9 B_90    C B_90

我想创建三个新列(a1_ta2_ta3_t),其中a1位于list1

list1 <-c('B_10','B_20','B_30')

然后使用a1_t

填写B_00_30

a1是否在list2

list2 <-c('B_40','B_50','B_60')

然后使用a1_t

填写B_40_60

a1是否在list3

list3 <-c('B_70','B_80','B_90')

然后使用a1_t

填写B_70_90

如果不在list1list2list3,则将a1的值放入a1_t

然后使用a2_ta3_t遍历a2a3的相同过程进行匹配。

最后我希望输出看起来像这样:

> data
    a1   a2   a3    a1_t    a2_t    a3_t
1    A    A B_10       A       A B_00_30
2 B_20    A    C B_00_30       A       C
3 B_30    A    C B_00_30       A       C
4    C    C    A       C       C       A
5    A B_50 B_50       A B_40_60 B_40_60
6    C    C    A       C       C       A
7    C B_70    A       C B_70_90       A
8 B_80    C B_80 B_70_90       C B_70_90
9 B_90    C    A B_70_90       C       A

创建原始原始数据:

data <- structure(list(a1 = c("A", "B_20", "B_30", "C", "A", "C", "C", 
"B_80", "B_90"), a2 = c("A", "A", "A", "C", "B_50", "C", "B_70", 
"C", "C"), a3 = c("B_10", "C", "C", "A", "B_50", "A", "A", "B_80", 
"A")), class = "data.frame", .Names = c("a1", "a2", "a3"), row.names = c(NA, 
-9L))

创建所需的输出数据:

data <- structure(list(a1 = structure(c(1L, 2L, 3L, 6L, 1L, 6L, 6L, 4L, 5L), .Label = c("A", "B_20", "B_30", "B_80", "B_90", "C"), class = "factor"), 
                       a2 = structure(c(1L, 1L, 1L, 4L, 2L, 4L, 3L, 4L, 4L), .Label = c("A", "B_50", "B_70", "C"), class = "factor"), 
                       a3 = structure(c(2L, 5L, 5L, 1L, 3L, 1L, 1L, 4L, 1L), .Label = c("A", "B_10", "B_50", "B_80", "C"), class = "factor"), 
                       a1_t = structure(c(1L, 2L, 2L, 4L, 1L, 4L, 4L, 3L, 3L), .Label = c("A", "B_00_30", "B_70_90", "C"), class = "factor"), 
                       a2_t = structure(c(1L, 1L, 1L, 4L, 2L, 4L, 3L, 4L, 4L), .Label = c("A", "B_40_60", "B_70_90", "C"), class = "factor"), 
                       a3_t = structure(c(2L, 5L, 5L, 1L, 3L, 1L, 1L, 4L, 1L), .Label = c("A", "B_00_30", "B_40_60", "B_70_90", "C"), class = "factor")), 
                  .Names = c("a1", "a2", "a3", "a1_t", "a2_t", "a3_t"), class = "data.frame", row.names = c(NA, -9L))

由于 -al

最终工作代码w / answer:

library(dplyr)
list1 <-c('B_10','B_20','B_30')
list2 <-c('B_40','B_50','B_60')
list3 <-c('B_70','B_80','B_90')

lookup = rbind(cbind(list = list1, val = "B_00_30"), 
               cbind(list2, "B_40_60"), 
               cbind(list3, "B_70_90"))
g <- sapply(data, function(x) { 
  tmp = lookup[, 2][match(x, lookup[, 1])] 
  ifelse(is.na(tmp), x, tmp) 
})
gd <- as.data.frame (g)
gd <- mutate (gd,a1_t=a1,a2_t=a2,a3_t=a3)
gd <- select (gd,a1_t,a2_t,a3_t)
h <- cbind (data,gd)
> h
    a1   a2   a3    a1_t    a2_t    a3_t
1    A    A B_10       A       A B_00_30
2 B_20    A    C B_00_30       A       C
3 B_30    A    C B_00_30       A       C
4    C    C    A       C       C       A
5    A B_50 B_50       A B_40_60 B_40_60
6    C    C    A       C       C       A
7    C B_70    A       C B_70_90       A
8 B_80    C B_80 B_70_90       C B_70_90
9 B_90    C    A B_70_90       C       A

2 个答案:

答案 0 :(得分:1)

一种方式可能是:

lookup = rbind(cbind(list = list1, val = "B_00_30"), 
               cbind(list2, "B_40_60"), 
               cbind(list3, "B_70_90"))
sapply(data, function(x) { 
                 tmp = lookup[, 2][match(x, lookup[, 1])] 
                 ifelse(is.na(tmp), x, tmp) 
             })
#      a1        a2        a3       
# [1,] "A"       "A"       "B_00_30"
# [2,] "B_00_30" "A"       "C"      
# [3,] "B_00_30" "A"       "C"      
# [4,] "C"       "C"       "A"      
# [5,] "A"       "B_40_60" "B_40_60"
# [6,] "C"       "C"       "A"      
# [7,] "C"       "B_70_90" "A"      
# [8,] "B_70_90" "C"       "B_70_90"
# [9,] "B_70_90" "C"       "A" 

然后你可以cbind到“数据”并根据需要强制转换为“data.frame”。

答案 1 :(得分:0)

使用cut

的另一种方法
 indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,30,60,90),labels=F)

(在这里,你会收到一条警告信息,因为as.numeric这些字符元素会将它们强制转换为NAs,这是我的意图。)

或使用list1:list3

中的信息
 val <- sapply(mget(ls(pattern="list")),function(x) max(as.numeric(gsub("._","",x))))
  val
 # list1 list2 list3 
 # 30    60    90 

 #indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,val),labels=F)  
 indx[!is.na(indx)] <- c("B_00_30","B_40_60", "B_70_90")[indx[!is.na(indx)]]
 indx[is.na(indx)] <- unlist(data)[!grepl("_", unlist(data))]

  data1 <- data
 data1[] <- indx
 colnames(data1) <- paste(colnames(data1),"t",sep="_")

更新

要避免出现警告消息,您可以执行以下操作:

 m1 <- as.matrix(data)
indx <- grepl("\\d",gsub(".\\_","",m1))
indx1 <- cut(as.numeric(gsub(".\\_","",m1[indx])),breaks=c(0,30,60,90),labels=F) 
m1[indx] <- c("B_00_30", "B_40_60", "B_70_90")[indx1]
data1 <- data
data1[] <- m1
colnames(data1) <- paste(colnames(data1),"t",sep="_")
 cbind(data, data1)
 #       a1   a2   a3    a1_t    a2_t    a3_t
 #  1    A    A B_10       A       A B_00_30
 #  2 B_20    A    C B_00_30       A       C
 #  3 B_30    A    C B_00_30       A       C
 #  4    C    C    A       C       C       A
 #  5    A B_50 B_50       A B_40_60 B_40_60
 #  6    C    C    A       C       C       A
 #  7    C B_70    A       C B_70_90       A
 #  8 B_80    C B_80 B_70_90       C B_70_90
 #  9 B_90    C    A B_70_90       C       A