这个问题类似于我问的here。我再次有一个非常大的矩阵,行和列的名称是相同的。这些名称是一个三个字母的字符串,后跟一个数字。三个字母的字符串会重复,只有数字会更改。重复几次后,字符串将更改,并且数字将从1重新开始。
基本上我要寻找的是根据每个元素的行名和列名进行特定的计算。
我将举一个我要寻找的小例子。这是一个矩阵a
:
matrix <- matrix(c(1:36), nrow = 6, byrow = TRUE)
names <- paste(rep(c("aaa" , "bbb", "ccc"), each = 2) , rep(c(1:2) , times = 3))
rownames(matrix) <- names
colnames(matrix) <- names
给出:
aaa 1 aaa 2 bbb 1 bbb 2 ccc 1 ccc 2
aaa 1 1 2 3 4 5 6
aaa 2 7 8 9 10 11 12
bbb 1 13 14 15 16 17 18
bbb 2 19 20 21 22 23 24
ccc 1 25 26 27 28 29 30
ccc 2 31 32 33 34 35 36
对于这个矩阵的每个元素,我想做一个乘法。用言语解释有点困难。
如果矩阵的元素的行名与其列名使用不同的三个字母字符串,则我将匹配字符串后出现的数字,并将“第一个3个字母字符串编号”乘以“第二个三个字母字符串相同的字符串”数字”。
如果"aaa"
与"bbb"
匹配,则:
matrix[aaa (number n), aaa (number m)] * matrix[bbb (number n), bbb (number m)]
如果"aaa"
等于"aaa"
,则
matrix[aaa (number n), aaa (number m)] * matrix[aaa (number n), aaa (number m)]
或基本上是平方的元素。
因此,我将举例说明我要寻找的东西:
在matrix["aaa 1", "aaa 2"]
中,我将matrix["aaa 1", "aaa 2"]
与matrix["aaa 1", "aaa 2"]
(2 * 2)乘以4
在matrix["aaa 1", "bbb 2"]
中,我将matrix["aaa 1", "aaa 2"]
与matrix["bbb 1", "bbb 2"]
(2 * 16)相乘得到32
在matrix["bbb 2", "ccc 1"]
中,我将matrix["bbb 2", "bbb 1"]
与matrix["ccc 2", "ccc 1"]
(21 * 35)乘以735
最后,矩阵(称为d)应为:
aaa 1 aaa 2 bbb 1 bbb 2 ccc 1 ccc 2
aaa 1 1 4 15 32 29 60
aaa 2 49 64 147 176 245 288
bbb 1 15 32 225 256 435 480
bbb 2 147 176 441 484 735 792
ccc 1 29 60 435 480 841 900
ccc 2 245 288 735 792 1225 1296
我通过使用可怕的代码得到了
d <- matrix^2
d[1,3] <- matrix[1,1] * matrix[3,3]
d[1,4] <- matrix[1,2] * matrix[3,4]
d[1,5] <- matrix[1,1] * matrix[5,5]
d[1,6] <- matrix[1,2] * matrix[5,6]
d[2,3] <- matrix[2,1] * matrix[4,3]
d[2,4] <- matrix[2,2] * matrix[4,4]
d[2,5] <- matrix[2,1] * matrix[6,5]
d[2,6] <- matrix[2,2] * matrix[6,6]
d[3,1] <- matrix[3,3] * matrix[1,1]
d[3,2] <- matrix[3,4] * matrix[1,2]
d[3,5] <- matrix[3,3] * matrix[5,5]
d[3,6] <- matrix[3,4] * matrix[5,6]
d[4,1] <- matrix[4,3] * matrix[2,1]
d[4,2] <- matrix[4,4] * matrix[2,2]
d[4,5] <- matrix[4,3] * matrix[6,5]
d[4,6] <- matrix[4,4] * matrix[6,6]
d[5,1] <- matrix[5,5] * matrix[1,1]
d[5,2] <- matrix[5,6] * matrix[1,2]
d[5,3] <- matrix[5,5] * matrix[3,3]
d[5,4] <- matrix[5,6] * matrix[3,4]
d[6,1] <- matrix[6,5] * matrix[2,1]
d[6,2] <- matrix[6,6] * matrix[2,2]
d[6,3] <- matrix[6,5] * matrix[4,3]
d[6,4] <- matrix[6,6] * matrix[4,4]
是否有代码可以使用循环或其他方法更有效地解决此问题?
答案 0 :(得分:3)
肮脏的循环:
d2 <- matrix^2
for (i in rownames(matrix)) {
for (j in colnames(matrix)) {
i1 <- strsplit(i, ' ', fixed = T)[[1]]
j1 <- strsplit(j, ' ', fixed = T)[[1]]
ni <- c(i1[2], j1[2])
n1 <- paste(i1[1], ni)
n2 <- paste(j1[1], ni)
d2[i, j] <- matrix[n1[1], n1[2]] * matrix[n2[1], n2[2]]
}
}
d2
# aaa 1 aaa 2 bbb 1 bbb 2 ccc 1 ccc 2
# aaa 1 1 4 15 32 29 60
# aaa 2 49 64 147 176 245 288
# bbb 1 15 32 225 256 435 480
# bbb 2 147 176 441 484 735 792
# ccc 1 29 60 435 480 841 900
# ccc 2 245 288 735 792 1225 1296
all.equal(d2, d)
# [1] TRUE
这将更快(无循环):
require(data.table)
require(Hmisc)
mat <- matrix # rename matrix variable,
# it is bad practice to name variables the same as internal functions
rn <- rownames(mat)
nn <- data.table(expand.grid(rn, rn, stringsAsFactors = F)) # all combinations of names
# split into parts:
nn[, Cs(v1, s1) := tstrsplit(Var1, ' ', fixed = T)]
nn[, Cs(v2, s2) := tstrsplit(Var2, ' ', fixed = T)]
# make respective new names:
nn[, a1 := paste(v1, s1)]
nn[, a2 := paste(v1, s2)]
nn[, b1 := paste(v2, s1)]
nn[, b2 := paste(v2, s2)]
index <- as.matrix(nn[, lapply(.SD, match, rn),
.SDcols = Cs(a1, a2, b1, b2)]) # get indexes of elements
d3 <- mat[index[, 1:2]] * mat[index[, 3:4]] # selection of elements and multiplication
d3 <- matrix(d3, ncol = ncol(mat)) # convert to matrix
rownames(d3) <- rn
colnames(d3) <- rn
all.equal(d3, d2)
# [1] TRUE
答案 1 :(得分:2)
我们可以在此处使用mapply
#Get all the possible combination of rownames and column names
all_combns <- expand.grid(rownames(matrix), colnames(matrix),
stringsAsFactors = FALSE)
matrix[] <- mapply(function(x, y) {
#Extract first three letters
first_group <- substr(x, 1, 3)
second_group <- substr(y, 1, 3)
#Extract the numeric part which could also be done in this example by
#substr(x, 5, 5)
#I am just extracting the numeric part in the string.
first_num <- sub("[^\\d]+", "", x, perl = TRUE)
second_num <- sub("[^\\d]+", "", y, perl = TRUE)
#Construct element 1 and multiply it by elemnt 2
matrix[paste(first_group, first_num),paste(first_group, second_num)] *
matrix[paste(second_group, first_num),paste(second_group, second_num)]
} , all_combns[, 1], all_combns[, 2])
matrix
# aaa 1 aaa 2 bbb 1 bbb 2 ccc 1 ccc 2
#aaa 1 1 4 15 32 29 60
#aaa 2 49 64 147 176 245 288
#bbb 1 15 32 225 256 435 480
#bbb 2 147 176 441 484 735 792
#ccc 1 29 60 435 480 841 900
#ccc 2 245 288 735 792 1225 1296
答案 2 :(得分:0)
使用tidyr和dplyr的另一种方法:
mat_df <- as.data.frame(matrix)
mat_df <- gather(mat_df, col, Val)
mat_df$rows <-row.names(matrix)
mat_df <- unite(mat_df, "mult", c("rows", "col"), sep = " ", remove=F)
mat_df <- separate(mat_df, col, c("col_let", "col_fig"), remove=F)
mat_df <- separate(mat_df, rows, c("rows_let", "rows_fig"), remove=F)
mat_df <- unite(mat_df, "mult1", c("rows", "rows_let", "col_fig"), sep = " ", remove=F)
mat_df <- unite(mat_df, "mult2", c("col_let", "rows_fig", "col"), sep = " ", remove=F)
mat_df <- mat_df %>%
left_join(mat_df[, c("Val", "mult")], by= c("mult1" = "mult")) %>%
left_join(mat_df[, c("Val", "mult")], by= c("mult2" = "mult")) %>%
mutate(Final = Val*Val.y) %>%
select(rows, col, Final)
matrix_new <- as.matrix(spread(mat_df, col, Final)[, -1])
rownames(matrix_new) <- names