我是一个非常偶然的R用户,这是我第一次在网上或其他任何地方询问有关R的问题,所以如果有任何不清楚的话我会事先道歉。
我有一个数字数据框,其中大约有100列,每个列都有相同的数字(本例中为数字10),需要与数字向量的值相乘,该数字向量特定于每列。我完全陷入困境,并希望得到任何帮助。
这是一个简化的例子:
df
V1 V2 V3
1 0 0 2
2 1 0 2
3 0 0 1
4 0 0 2
5 0 0 1
6 10 0 1
7 0 0 1
8 0 0 2
9 0 10 2
10 0 0 2
11 10 0 1
12 0 0 10
13 1 2 1
14 0 0 2
15 0 0 0
16 0 1 2
17 1 0 10
18 1 1 1
19 0 0 1
20 0 0 2
相应的矢量如下所示:
V
v1 v2 v3
0.01256117 0.03037231 0.55444079
因此,df列V1的值“10”需要乘以矢量V的值v1,df列V2的值“10”乘以矢量V的值v2等。
非常感谢任何帮助!
答案 0 :(得分:3)
这是一个简单的方法:
# sample data
df<-data.frame(v1=c(1:10,10), v2= c(5:13,10,14), v3=8:18)
vec=c(0.1, 0.2, 0.3) # sample vector to multiply by
df
# v1 v2 v3
#1 1 5 8
#2 2 6 9
#3 3 7 10
#4 4 8 11
#5 5 9 12
#6 6 10 13
#7 7 11 14
#8 8 12 15
#9 9 13 16
#10 10 10 17
#11 10 14 18
df2 <- t(t(df==10) * vec * t(df))
df[df==10] <- 0
df + df2
# v1 v2 v3
#1 1 5 8
#2 2 6 9
#3 3 7 3
#4 4 8 11
#5 5 9 12
#6 6 2 13
#7 7 11 14
#8 8 12 15
#9 9 13 16
#10 1 2 17
#11 1 14 18
答案 1 :(得分:2)
这是一个变体
df1 <- (df!=10)*df + ((df==10)*df) * vec1[col(df)]
df1
# V1 V2 V3
#1 0.0000000 0.0000000 2.000000
#2 1.0000000 0.0000000 2.000000
#3 0.0000000 0.0000000 1.000000
#4 0.0000000 0.0000000 2.000000
#5 0.0000000 0.0000000 1.000000
#6 0.1256117 0.0000000 1.000000
#7 0.0000000 0.0000000 1.000000
#8 0.0000000 0.0000000 2.000000
#9 0.0000000 0.3037231 2.000000
#10 0.0000000 0.0000000 2.000000
#11 0.1256117 0.0000000 1.000000
#12 0.0000000 0.0000000 5.544408
#13 1.0000000 2.0000000 1.000000
#14 0.0000000 0.0000000 2.000000
#15 0.0000000 0.0000000 0.000000
#16 0.0000000 1.0000000 2.000000
#17 1.0000000 0.0000000 5.544408
#18 1.0000000 1.0000000 1.000000
#19 0.0000000 0.0000000 1.000000
#20 0.0000000 0.0000000 2.000000
对于大数据集,使用lapply/Map
等
f1 <- function(x,y) {i <- x==10
x[i] <- x[i]*y
x}
df2 <- data.frame(Map(f1, df, vec1))
df2
# V1 V2 V3
#1 0.0000000 0.0000000 2.000000
#2 1.0000000 0.0000000 2.000000
#3 0.0000000 0.0000000 1.000000
#4 0.0000000 0.0000000 2.000000
#5 0.0000000 0.0000000 1.000000
#6 0.1256117 0.0000000 1.000000
#7 0.0000000 0.0000000 1.000000
#8 0.0000000 0.0000000 2.000000
#9 0.0000000 0.3037231 2.000000
#10 0.0000000 0.0000000 2.000000
#11 0.1256117 0.0000000 1.000000
#12 0.0000000 0.0000000 5.544408
#13 1.0000000 2.0000000 1.000000
#14 0.0000000 0.0000000 2.000000
#15 0.0000000 0.0000000 0.000000
#16 0.0000000 1.0000000 2.000000
#17 1.0000000 0.0000000 5.544408
#18 1.0000000 1.0000000 1.000000
#19 0.0000000 0.0000000 1.000000
#20 0.0000000 0.0000000 2.000000
identical(df1, df2)
#[1] TRUE
或data.table
library(data.table)#v1.9.5+
setDT(df)
for(j in seq_along(df)){
set(df, i=NULL, j=j, value= as.numeric(df[[j]]))
set(df, i=which(df[[j]]==10), j=j, value= df[[j]][df[[j]]==10]*vec1[j])
}
df
# V1 V2 V3
#1: 0.0000000 0.0000000 2.000000
#2: 1.0000000 0.0000000 2.000000
#3: 0.0000000 0.0000000 1.000000
#4: 0.0000000 0.0000000 2.000000
#5: 0.0000000 0.0000000 1.000000
#6: 0.1256117 0.0000000 1.000000
#7: 0.0000000 0.0000000 1.000000
#8: 0.0000000 0.0000000 2.000000
#9: 0.0000000 0.3037231 2.000000
#10:0.0000000 0.0000000 2.000000
#11:0.1256117 0.0000000 1.000000
#12:0.0000000 0.0000000 5.544408
#13:1.0000000 2.0000000 1.000000
#14:0.0000000 0.0000000 2.000000
#15:0.0000000 0.0000000 0.000000
#16:0.0000000 1.0000000 2.000000
#17:1.0000000 0.0000000 5.544408
#18:1.0000000 1.0000000 1.000000
#19:0.0000000 0.0000000 1.000000
#20:0.0000000 0.0000000 2.000000
df <- structure(list(V1 = c(0L, 1L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 0L,
10L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L), V2 = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L,
0L, 0L), V3 = c(2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 10L,
1L, 2L, 0L, 2L, 10L, 1L, 1L, 2L)), .Names = c("V1", "V2", "V3"
), class = "data.frame", row.names = c(NA, -20L))
vec1 <- c(v1=0.01256117, v2 =0.03037231,v3 =0.55444079)
答案 2 :(得分:1)
这是另一个建议:
arr <- which(df==10, arr.ind=TRUE)
df[arr] <- df[arr] * v[arr[,2]]
#> df
# V1 V2 V3
#1 0.0000000 0.0000000 2.000000
#2 1.0000000 0.0000000 2.000000
#3 0.0000000 0.0000000 1.000000
#4 0.0000000 0.0000000 2.000000
#5 0.0000000 0.0000000 1.000000
#6 0.1256117 0.0000000 1.000000
#7 0.0000000 0.0000000 1.000000
#8 0.0000000 0.0000000 2.000000
#9 0.0000000 0.3037231 2.000000
#10 0.0000000 0.0000000 2.000000
#11 0.1256117 0.0000000 1.000000
#12 0.0000000 0.0000000 5.544408
#13 1.0000000 2.0000000 1.000000
#14 0.0000000 0.0000000 2.000000
#15 0.0000000 0.0000000 0.000000
#16 0.0000000 1.0000000 2.000000
#17 1.0000000 0.0000000 5.544408
#18 1.0000000 1.0000000 1.000000
#19 0.0000000 0.0000000 1.000000
#20 0.0000000 0.0000000 2.000000
df <- structure(list(V1 = c(0L, 1L, 0L, 0L, 0L, 10L, 0L, 0L, 0L, 0L,10L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L), V2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 10L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 0L),
V3 = c(2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 10L, 1L, 2L, 0L, 2L,
10L, 1L, 1L, 2L)), .Names = c("V1", "V2", "V3"), class = "data.frame",
row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "20"))
v <- c(0.01256117, 0.03037231, 0.55444079)