我有一个大的稀疏矩阵,并希望保留所有列的前N个和后N个记录(对于此示例N = 2)并用零替换所有其他非零元素。我所遵循的当前方法是,通过生成原始矩阵的秩矩阵,然后使用它来保留前N个和后N个元素。
此示例的问题是,所有列中非零元素的总数不唯一。
以下是可重现的代码。
TestMatrix = Matrix(c(0,100,12,0,11,1,2,3,4,5,0,31,29,0,28,69,61,0,72,27,0,0,0,0,5,19,0,0,17,33),6,sparse = TRUE)
n <- diff(TestMatrix@p) ## number of non-zeros per column
lst <- split(TestMatrix@x, rep.int(1:ncol(TestMatrix), n)) ## columns to list
r <- unlist(lapply(lapply(lst,function(x) x * -1), rank)) ## column-wise ranking and result collapsing
RankMatrix <- TestMatrix ## copy sparse matrix
RankMatrix@x <- r ## replace non-zero elements with rank
#Once the rank matrix is obtained I use something like
TestMatrix[(RankMatrix <= 1) | (RankMatrix > 3)] = 0 #This particular snippet is just trying to show what I want to achieve.
答案 0 :(得分:1)
以下是我的解决方案。我只是想知道它是否可以在处理大型稀疏矩阵时进行优化。
TestMatrix = Matrix(c(0.80,0.9,0.6,0,0,0.3,0.5,
0,0,0.3,0,0,0,0,
0.4,0.5,0.6,0,0,0.1,0,
0,0,0,0,0,0,0,
0.3,0.4,0.5,0.2,0.1,0.7,0.8,
0.6,0.7,0.5,0.8,0,0,0),7,sparse = TRUE)
n <- diff(TestMatrix@p) ## number of non-zeros per column
lst <- split(TestMatrix@x, rep.int(1:ncol(TestMatrix), n)) ## columns to list
r <- unlist(lapply(lapply(lst,function(x) x * -1), rank)) ## column-wise ranking and result collapsing
RankMatrix <- TestMatrix ## copy sparse matrix
RankMatrix@x <- r ## replace non-zero elements with rank
#TestMatrix
#7 x 6 sparse Matrix of class "dgCMatrix"
# [1,] 0.8 . 0.4 . 0.3 0.6
# [2,] 0.9 . 0.5 . 0.4 0.7
# [3,] 0.6 0.3 0.6 . 0.5 0.5
# [4,] . . . . 0.2 0.8
# [5,] . . . . 0.1 .
# [6,] 0.3 . 0.1 . 0.7 .
# [7,] 0.5 . . . 0.8 .
## Function to retain Top N and Bottom N records
GetTopNBottomN <- function(aMatrix,rMatrix){
#aMatrix = original SparseMatrix, rMatrix = RankMatrix
n = 2 ## Top 2 and Bottom 2 Elements across all the columns
for(j in 1:ncol(aMatrix)){
MaxValue = max(rMatrix[,j])
if(MaxValue <= 2*n) next ##Ignore the column if there are less than or equal to 2*n nonzero values
aMatrix[(rMatrix[,j] > n & rMatrix[,j] <= MaxValue-n),j] = 0
}
return(aMatrix)
}
GetTopNBottomN(TestMatrix,RankMatrix)
## Obtained Output
# 7 x 6 sparse Matrix of class "dgCMatrix"
#
# [1,] 0.8 . 0.4 . . 0.6
# [2,] 0.9 . 0.5 . . 0.7
# [3,] . 0.3 0.6 . . 0.5
# [4,] . . . . 0.2 0.8
# [5,] . . . . 0.1 .
# [6,] 0.3 . 0.1 . 0.7 .
# [7,] 0.5 . . . 0.8 .