我有一个矩阵:
m <- matrix(c(
1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0,
3, 0, 0, 0, 0, 0,
3, 0, 0, 0, 0, 2,
3, 0, 0, 0, 0, 0,
3, 0, 0, 0, 2, 2),
ncol = 6, byrow = TRUE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 0 0 0
[2,] 0 0 0 0 0 0
[3,] 3 0 0 0 0 0
[4,] 3 0 0 0 0 2 # <- island 3, value 2
[5,] 3 0 0 0 0 0
[6,] 3 0 0 0 2 2 # <- island 4, also value 2
在此矩阵中,有四个“岛”,即用零分隔的非零值:
(1)由三个1组成的岛,(2)四个3组成的岛,(3)一个2组成的岛,以及(4)两个2组成的岛。
因此,两个岛由值2
组成。我想确定这样的“重复”岛,并将其中一个“岛”的值更改为下一个可用数字(在这种情况下为4
)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 0 0 0
[2,] 0 0 0 0 0 0
[3,] 3 0 0 0 0 0
[4,] 3 0 0 0 0 2
[5,] 3 0 0 0 0 0
[6,] 3 0 0 0 4 4
答案 0 :(得分:2)
有趣的问题!让我们来看一个更复杂的案例
(M <- matrix(c(1, 0, 3, 3, 3, 3, 1, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2,
0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 2, 0, 2), 6, 6))
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 1 0 0 1
# [2,] 0 0 0 0 0 0
# [3,] 3 0 3 3 0 0
# [4,] 3 0 0 0 0 2
# [5,] 3 0 2 0 0 0
# [6,] 3 0 0 0 2 2
这是一个基于图形的解决方案。
library(igraph)
# Indices of nonzero matrix elements
idx <- which(M != 0, arr.ind = TRUE)
# Adjacency matrix for matrix entries
# Two entries are adjacent if their column or row number differs by one
# Also, due to idx, an implicit condition is also that the two entries are the same
adj <- 1 * (as.matrix(dist(idx, method = "manhattan")) == 1)
# Creating loops as to take into account singleton islands
diag(adj) <- 1
# A corresponding graphs
g <- graph_from_adjacency_matrix(adj, mode = "undirected")
# Connected components of this graph
cmps <- clusters(g)
# Going over unique values of M
for(i in 1:max(M)) {
# Islands of value i
un <- unique(cmps$membership[M[idx] == i])
# More than one island?
if(length(un) > 1)
# If so, let's go over islands 2, 3, ...
for(cmp in un[-1])
# ... and replace corresponding matrix entries by max(M) + 1
M[idx[cmps$membership == cmp, , drop = FALSE]] <- max(M) + 1
}
M
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 1 0 0 4
# [2,] 0 0 0 0 0 0
# [3,] 3 0 7 7 0 0
# [4,] 3 0 0 0 0 6
# [5,] 3 0 2 0 0 0
# [6,] 3 0 0 0 5 5
还请注意,如果可以找到adj
,就可以找到所有孤岛,只要找到其排列就可以生成具有最大块数的块对角矩阵。然后,每个块将对应一个岛。但是,我找不到相关过程的R实现。
答案 1 :(得分:1)
raster::clump
*来识别非零值的 Islands。然后使用data.table
便利函数来确定应更新的值。
library(raster)
library(data.table)
# get index of non-zero values. re-order to match the clump order
ix <- which(m != 0, arr.ind = TRUE)
ix <- ix[order(ix[ , "row"]), ]
# get clumps
cl <- clump(raster(m))
cl_ix <- cl@data@values
# put stuff in a data.table and order by x
d <- data.table(ix, x = m[ix], cl_ix = cl_ix[!is.na(cl_ix)])
setorder(d, x, cl_ix)
# for each x, create a counter of runs of clump index
d[ , g := rleid(cl_ix), by = x]
# for 'duplicated' runs...
# ...add to x based on runs of x and clump index runs
d[g > 1, x := max(d$x) + rleid(x, g)]
# update matrix
m2 <- m
m2[as.matrix(d[ , .(row, col)])] <- d$x
m
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 1 0 0 1
# [2,] 0 0 0 0 0 0
# [3,] 3 0 3 3 0 0
# [4,] 3 0 0 0 0 2
# [5,] 3 0 2 0 0 0
# [6,] 3 0 0 0 2 2
m2
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 1 0 0 4
# [2,] 0 0 0 0 0 0
# [3,] 3 0 7 7 0 0
# [4,] 3 0 0 0 0 2
# [5,] 3 0 5 0 0 0
# [6,] 3 0 0 0 6 6
*请注意,clump
函数要求igraph
包可用。
答案 2 :(得分:0)
这比我想象的“不是两个”条件要难得多,我现在通过while
循环实现了结果,我们将探讨是否可以改进:
(基本上,我们逐行移动并检查是否找到了该岛,如果是,则结束研究)
# some useful variables
i=1 # row counter
counter=0 # check if island is found
max_m <- max(m) #finds the max value in the matrix, to fill
while(counter == 0) {
if (any(m[i, ] == 2)) { # check if we find the island in the row, otherwise skip
row <- m[i, ]
row[row == 2] <- max_m + 1 # here we change the value
m[i, ] <- row
counter <- counter + 1
}
i = i + 1 # we move up one row
#cat("row number: ", i, "\n") # sanity check to see if it was an infinite loop
}
m
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 1 0 0 0
# [2,] 0 0 0 0 0 0
# [3,] 3 0 0 0 0 0
# [4,] 3 0 0 0 0 4
# [5,] 3 0 0 0 0 0
# [6,] 3 0 0 0 2 2
这远非完美,因为我们按行移动,所以如果第一个岛越过一列,我们将仅更改第一个值。
意外结果示例:
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 1 0 0 0
# [2,] 0 0 0 0 0 0
# [3,] 3 0 0 0 0 0
# [4,] 3 0 0 0 0 4
# [5,] 3 0 0 0 0 2 # problem here
# [6,] 3 0 0 0 0 0
使用的数据:
m <- matrix(c(rep(1, 3),
rep(0, 9),
3,
rep(0, 5),
3,
rep(0, 4),
2,
3,
rep(0, 5),
3,
rep(0,3),
rep(2, 2)),ncol=6,nrow=6, byrow = T)
答案 3 :(得分:0)
这可以通过软件包TraMineR
轻松实现。
islander <- function(mat) {
require(TraMineR)
rows.mat.seq <- seqdef(mat) # seeks all sequences in rows
cols.mat.seq <- seqdef(t(mat)) # tranposed version
rows <- seqpm(rows.mat.seq, 22)$MIndex # seeks for sub sequence 2-2 in rows
cols <- seqpm(cols.mat.seq, 22)$MIndex # seeks for sub sequence 2-2 in columns
if (length(cols) == 0) { # the row case
mat[rows, which(mat[rows, ] == 2)] <- 4
return(mat)
} else { # the column case
mat[which(mat[, cols] == 2), cols] <- 4
return(mat)
}
}
屈服
> islander(row.mat)
...
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 0 0 0
[2,] 0 0 0 0 0 0
[3,] 3 0 0 0 0 0
[4,] 3 0 0 0 0 2
[5,] 3 0 0 0 0 0
[6,] 3 0 0 0 4 4
> islander(col.mat)
...
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 0 0 0
[2,] 0 0 0 0 0 0
[3,] 3 0 0 0 0 0
[4,] 3 0 0 0 0 0
[5,] 3 0 0 0 0 4
[6,] 3 0 0 2 0 4
注意::如果您的岛屿较长,则需要熟练掌握代码,例如因为岛的长度是3,所以seqpm(., 222)
。当然可以在功能中实现对所有情况的考虑。
数据
row.mat <- structure(c(1, 0, 3, 3, 3, 3, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2), .Dim = c(6L,
6L))
col.mat <- structure(c(1, 0, 3, 3, 3, 3, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2), .Dim = c(6L,
6L))
> row.mat
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 0 0 0
[2,] 0 0 0 0 0 0
[3,] 3 0 0 0 0 0
[4,] 3 0 0 0 0 2
[5,] 3 0 0 0 0 0
[6,] 3 0 0 0 2 2
> col.mat
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 0 0 0
[2,] 0 0 0 0 0 0
[3,] 3 0 0 0 0 0
[4,] 3 0 0 0 0 0
[5,] 3 0 0 0 0 2
[6,] 3 0 0 2 0 2