我想编写一个函数或循环来创建三个新列,然后使用相同的值或指定的值填充这些列,如果原始列中的值在三个指定列表中的一个之内。
例如,以下是数据的样子:
> data
a1 a2 a3
1 C C A
2 A B_20 B_20
3 A C B_30
4 C C B_40
5 C A A
6 B_60 B_60 B_60
7 A A C
8 A C B_80
9 B_90 C B_90
我想创建三个新列(a1_t
,a2_t
,a3_t
),其中a1
位于list1
list1 <-c('B_10','B_20','B_30')
然后使用a1_t
B_00_30
或a1
是否在list2
list2 <-c('B_40','B_50','B_60')
然后使用a1_t
B_40_60
或a1
是否在list3
list3 <-c('B_70','B_80','B_90')
然后使用a1_t
B_70_90
如果不在list1
,list2
或list3
,则将a1
的值放入a1_t
。
然后使用a2_t
和a3_t
遍历a2
和a3
的相同过程进行匹配。
最后我希望输出看起来像这样:
> data
a1 a2 a3 a1_t a2_t a3_t
1 A A B_10 A A B_00_30
2 B_20 A C B_00_30 A C
3 B_30 A C B_00_30 A C
4 C C A C C A
5 A B_50 B_50 A B_40_60 B_40_60
6 C C A C C A
7 C B_70 A C B_70_90 A
8 B_80 C B_80 B_70_90 C B_70_90
9 B_90 C A B_70_90 C A
创建原始原始数据:
data <- structure(list(a1 = c("A", "B_20", "B_30", "C", "A", "C", "C",
"B_80", "B_90"), a2 = c("A", "A", "A", "C", "B_50", "C", "B_70",
"C", "C"), a3 = c("B_10", "C", "C", "A", "B_50", "A", "A", "B_80",
"A")), class = "data.frame", .Names = c("a1", "a2", "a3"), row.names = c(NA,
-9L))
创建所需的输出数据:
data <- structure(list(a1 = structure(c(1L, 2L, 3L, 6L, 1L, 6L, 6L, 4L, 5L), .Label = c("A", "B_20", "B_30", "B_80", "B_90", "C"), class = "factor"),
a2 = structure(c(1L, 1L, 1L, 4L, 2L, 4L, 3L, 4L, 4L), .Label = c("A", "B_50", "B_70", "C"), class = "factor"),
a3 = structure(c(2L, 5L, 5L, 1L, 3L, 1L, 1L, 4L, 1L), .Label = c("A", "B_10", "B_50", "B_80", "C"), class = "factor"),
a1_t = structure(c(1L, 2L, 2L, 4L, 1L, 4L, 4L, 3L, 3L), .Label = c("A", "B_00_30", "B_70_90", "C"), class = "factor"),
a2_t = structure(c(1L, 1L, 1L, 4L, 2L, 4L, 3L, 4L, 4L), .Label = c("A", "B_40_60", "B_70_90", "C"), class = "factor"),
a3_t = structure(c(2L, 5L, 5L, 1L, 3L, 1L, 1L, 4L, 1L), .Label = c("A", "B_00_30", "B_40_60", "B_70_90", "C"), class = "factor")),
.Names = c("a1", "a2", "a3", "a1_t", "a2_t", "a3_t"), class = "data.frame", row.names = c(NA, -9L))
由于 -al
最终工作代码w / answer:
library(dplyr)
list1 <-c('B_10','B_20','B_30')
list2 <-c('B_40','B_50','B_60')
list3 <-c('B_70','B_80','B_90')
lookup = rbind(cbind(list = list1, val = "B_00_30"),
cbind(list2, "B_40_60"),
cbind(list3, "B_70_90"))
g <- sapply(data, function(x) {
tmp = lookup[, 2][match(x, lookup[, 1])]
ifelse(is.na(tmp), x, tmp)
})
gd <- as.data.frame (g)
gd <- mutate (gd,a1_t=a1,a2_t=a2,a3_t=a3)
gd <- select (gd,a1_t,a2_t,a3_t)
h <- cbind (data,gd)
> h
a1 a2 a3 a1_t a2_t a3_t
1 A A B_10 A A B_00_30
2 B_20 A C B_00_30 A C
3 B_30 A C B_00_30 A C
4 C C A C C A
5 A B_50 B_50 A B_40_60 B_40_60
6 C C A C C A
7 C B_70 A C B_70_90 A
8 B_80 C B_80 B_70_90 C B_70_90
9 B_90 C A B_70_90 C A
答案 0 :(得分:1)
一种方式可能是:
lookup = rbind(cbind(list = list1, val = "B_00_30"),
cbind(list2, "B_40_60"),
cbind(list3, "B_70_90"))
sapply(data, function(x) {
tmp = lookup[, 2][match(x, lookup[, 1])]
ifelse(is.na(tmp), x, tmp)
})
# a1 a2 a3
# [1,] "A" "A" "B_00_30"
# [2,] "B_00_30" "A" "C"
# [3,] "B_00_30" "A" "C"
# [4,] "C" "C" "A"
# [5,] "A" "B_40_60" "B_40_60"
# [6,] "C" "C" "A"
# [7,] "C" "B_70_90" "A"
# [8,] "B_70_90" "C" "B_70_90"
# [9,] "B_70_90" "C" "A"
然后你可以cbind
到“数据”并根据需要强制转换为“data.frame”。
答案 1 :(得分:0)
使用cut
indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,30,60,90),labels=F)
(在这里,你会收到一条警告信息,因为as.numeric
这些字符元素会将它们强制转换为NAs,这是我的意图。)
或使用list1:list3
val <- sapply(mget(ls(pattern="list")),function(x) max(as.numeric(gsub("._","",x))))
val
# list1 list2 list3
# 30 60 90
#indx <- cut(as.numeric(gsub(".\\_","",as.matrix(data))),breaks=c(0,val),labels=F)
indx[!is.na(indx)] <- c("B_00_30","B_40_60", "B_70_90")[indx[!is.na(indx)]]
indx[is.na(indx)] <- unlist(data)[!grepl("_", unlist(data))]
data1 <- data
data1[] <- indx
colnames(data1) <- paste(colnames(data1),"t",sep="_")
要避免出现警告消息,您可以执行以下操作:
m1 <- as.matrix(data)
indx <- grepl("\\d",gsub(".\\_","",m1))
indx1 <- cut(as.numeric(gsub(".\\_","",m1[indx])),breaks=c(0,30,60,90),labels=F)
m1[indx] <- c("B_00_30", "B_40_60", "B_70_90")[indx1]
data1 <- data
data1[] <- m1
colnames(data1) <- paste(colnames(data1),"t",sep="_")
cbind(data, data1)
# a1 a2 a3 a1_t a2_t a3_t
# 1 A A B_10 A A B_00_30
# 2 B_20 A C B_00_30 A C
# 3 B_30 A C B_00_30 A C
# 4 C C A C C A
# 5 A B_50 B_50 A B_40_60 B_40_60
# 6 C C A C C A
# 7 C B_70 A C B_70_90 A
# 8 B_80 C B_80 B_70_90 C B_70_90
# 9 B_90 C A B_70_90 C A