考虑到这是我下面的数据集
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.7 2.5 5.0 2.0 virginica
7.7 3.0 6.1 2.3 virginica
6.7 3.3 5.7 2.1 virginica
4.8 3.0 1.4 0.1 setosa
5.5 4.2 1.4 0.2 setosa
4.9 3.6 1.4 0.1 setosa
6.3 3.3 4.7 1.6 versicolor
5.6 2.9 3.6 1.3 versicolor
5.9 3.0 4.2 1.5 versicolor
df <- structure(list(Sepal.Length = c(5.7, 7.7, 6.7, 4.8, 5.5, 4.9,
6.3, 5.6, 5.9), Sepal.Width = c(2.5, 3, 3.3, 3, 4.2, 3.6, 3.3,
2.9, 3), Petal.Length = c(5, 6.1, 5.7, 1.4, 1.4, 1.4, 4.7, 3.6,
4.2), Petal.Width = c(2, 2.3, 2.1, 0.1, 0.2, 0.1, 1.6, 1.3, 1.5
), Species = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("setosa",
"versicolor", "virginica"), class = "factor")), row.names = c(NA,
-9L), class = "data.frame")
我的目标是
从物种的第一行==“ virginica”中减去Sepal.Length Sepal.Width Petal.Length Petal.Width
的值,并将每一行“ Setosa”相减,
我在下面做什么?
Virginia1_vs_Setosa1a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][1,]
Virginia1_vs_Setosa1a
0.9 -0.5 3.6 1.9
Virginia1_vs_Setosa2a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][2,]
Virginia1_vs_Setosa2a
0.2 -1.7 3.6 1.8
Virginia1_vs_Setosa3a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][3,]
Virginia1_vs_Setosa3a
0.8 -1.1 3.6 1.9
取每个元素的乘积
Virginia1_vs_Setosa1 <- as.numeric(
Virginia1_vs_Setosa1a[1]*Virginia1_vs_Setosa1a[2]*
Virginia1_vs_Setosa1a[3]*Virginia1_vs_Setosa1a[4])
0.9*-0.5*3.6*1.9 = -3.078
Virginia1_vs_Setosa2 <- as.numeric(
Virginia1_vs_Setosa2a[1]*Virginia1_vs_Setosa2a[2]*
Virginia1_vs_Setosa2a[3]*Virginia1_vs_Setosa2a[4])
0.2*-1.7*3.6*1.8 = -2.2032
Virginia1_vs_Setosa3 <- as.numeric(
Virginia1_vs_Setosa3a[1]*Virginia1_vs_Setosa3a[2]*
Virginia1_vs_Setosa3a[3]*Virginia1_vs_Setosa3a[4])
0.8*-1.1*3.6*1.9 = -6.0192
类似于弗吉尼亚州的第二行,setosa中的每一行。
Virginia2_vs_Setosa1a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][1,]
Virginia2_vs_Setosa2a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][2,]
Virginia2_vs_Setosa3a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][3,]
Virginia2_vs_Setosa1 <- as.numeric(
Virginia2_vs_Setosa1a[1]*Virginia2_vs_Setosa1a[2]*
Virginia2_vs_Setosa1a[3]*Virginia2_vs_Setosa1a[4])
Virginia2_vs_Setosa2 <- as.numeric(
Virginia2_vs_Setosa2a[1]*Virginia2_vs_Setosa2a[2]*
Virginia2_vs_Setosa2a[3]*Virginia2_vs_Setosa2a[4])
Virginia2_vs_Setosa3 <- as.numeric(
Virginia2_vs_Setosa3a[1]*Virginia2_vs_Setosa3a[2]*
Virginia2_vs_Setosa3a[3]*Virginia2_vs_Setosa3a[4])
rm(Virginia2_vs_Setosa1a, Virginia2_vs_Setosa2a,
Virginia2_vs_Setosa3a)
与弗吉尼亚州的第三行相似,setosa的每一行
Virginia3_vs_Setosa1a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][1,]
Virginia3_vs_Setosa2a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][2,]
Virginia3_vs_Setosa3a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][3,]
Virginia3_vs_Setosa1 <- as.numeric(
Virginia3_vs_Setosa1a[1]*Virginia3_vs_Setosa1a[2]*
Virginia3_vs_Setosa1a[3]*Virginia3_vs_Setosa1a[4])
Virginia3_vs_Setosa2 <- as.numeric(
Virginia3_vs_Setosa2a[1]*Virginia3_vs_Setosa2a[2]*
Virginia3_vs_Setosa2a[3]*Virginia3_vs_Setosa2a[4])
Virginia3_vs_Setosa3 <- as.numeric(
Virginia3_vs_Setosa3a[1]*Virginia3_vs_Setosa3a[2]*
Virginia3_vs_Setosa3a[3]*Virginia3_vs_Setosa3a[4])
rm(Virginia3_vs_Setosa1a, Virginia3_vs_Setosa2a,
Virginia3_vs_Setosa3a)
最后在下面创建一个3 * 3的矩阵
matrix(c(Virginia1_vs_Setosa1, Virginia1_vs_Setosa2, Virginia1_vs_Setosa3, Virginia2_vs_Setosa1, Virginia2_vs_Setosa2, Virginia2_vs_Setosa3,
Virginia3_vs_Setosa1, Virginia3_vs_Setosa2, Virginia3_vs_Setosa3), nrow=3, ncol=3)
[,1] [,2] [,3]
[1,] -3.0780 0.0000 4.9020
[2,] -2.2032 -26.0568 -8.8236
[3,] -6.0192 -17.3712 -4.6440
如您所见,我的解决方案非常笨拙且效率低下。如果有人可以向我展示一种实现相同结果的有效方法,我将非常感激。
答案 0 :(得分:1)
您可以使用双for
循环来完成此操作。 *apply
函数家族可能有解决方案,但是这一功能可行。
f <- droplevels(df$Species[df$Species != "versicolor"])
sp <- split(df[df$Species != "versicolor", ], f)
res <- matrix(0, 3, 3)
for(i in 1:nrow(sp[[1]])){
for(j in 1:nrow(sp[[2]])){
res[i, j] <- prod(sp[[2]][j, -5] - sp[[1]][i, -5])
}
}
res
# [,1] [,2] [,3]
#[1,] -3.0780 0.0000 4.9020
#[2,] -2.2032 -26.0568 -8.8236
#[3,] -6.0192 -17.3712 -4.6440
答案 1 :(得分:0)
对于这种特殊情况,您可以从outer
借用一些想法
X <- lapply(split(df[df$Species=="virginica", 1:4], 1:3), unlist)
Y <- lapply(split(df[df$Species=="setosa", 1:4], 1:3), unlist)
FUN <- function(l1, l2) mapply(function(v,w) prod(v-w), l1, l2)
Y <- rep(Y, rep.int(length(X), length(Y)))
if (length(X))
X <- rep(X, times = ceiling(length(Y)/length(X)))
matrix(FUN(X, Y), ncol=3L, byrow=TRUE)
在大多数情况下,您将需要生成每对可能的不同行对,然后根据您的公式进行计算。使用data.table
,将类似于:
library(data.table)
setDT(df)
setorder(df, Species)[, numid := rowid(Species)]
parts <- split(df, by=c("Species", "numid"))
combis <- CJ(parts, parts, sorted=FALSE)
combis[, .(
Species1=V1[[1]][,Species],
numid1=V1[[1]][,numid],
Species2=V2[[1]][,Species],
numid2=V2[[1]][,numid],
differ=prod(V1[[1]][, 1:4] - V2[[1]][, 1:4])),
by=seq_len(combis[,.N])][
Species1!=Species2, -1L]