我有一个超过1000行和100列的大矩阵。在每行中只有6-10列具有值,其余为零。我想创建一个只有5列的矩阵,它取每行中5个连续列的值。例如:
A = structure(c(0, 1L, 6L, 0, 2L, 0, 2L, 0, 1L, 4L, 1L, 3L, 7L, 2L, 6L, 2L, 4L, 0, 3L, 0, 3L, 5L, 1L, 4L, 0, 4L, 6L, 2L, 0, 0, 5L, 0, 3L, 5L, 0, 0, 0, 4L, 6L, 7L, 0, 7L, 5L, 7L, 8L, 6L, 0, 0, 8L, 9L, 0, 0, 0, 9L, 1L, 0 , 0, 0, 0, 2L, 7L, 0, 2L, 0, 0, 1L, 8L, 4, 0, 0), .Dim = c(5L, 14L))
#A =
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#[1,] 0 0 1 2 3 4 5 0 0 6 0 0 7 1
#[2,] 1 2 3 4 5 6 0 0 7 0 0 0 0 8
#[3,] 6 0 7 0 1 2 3 4 5 0 0 0 2 4
#[4,] 0 1 2 3 4 0 5 6 7 8 9 0 0 0
#[5,] 2 4 6 0 0 0 0 7 8 9 1 2 0 0
我想要这个矩阵:
B = structure(c(1L, 1L, 1L, 5L, 7L, 2L, 2L, 2L, 6L, 8L, 3L, 3L, 3L, 7L, 9L, 4L, 4L, 4L, 8L, 1L, 5L, 5L, 5L, 9L, 2L), .Dim = c(5L, 5L))
#B =
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 2 3 4 5
#[2,] 1 2 3 4 5
#[3,] 1 2 3 4 5
#[4,] 5 6 7 8 9
#[5,] 7 8 9 1 2
我的代码:
df = data.frame(A)
B = do.call(rbind, lapply(1:NROW(df), function(i) df[i,][(df[i,])!=0][1:5]))
# or
B = t(apply(X = df, MARGIN = 1, function(x) x[x!=0][1:5]))
我的代码适用于A的前两行,但其余行无效。我还考虑过获取无零的列索引,然后查看是否有5个连续的列(它们之间没有任何间隙)并检索它们的值。任何帮助非常感谢!
答案 0 :(得分:1)
您可以使用rollapply
:
library(zoo)
t(apply(A,1,function(x) {x[match(T,rollapply(!!x,5,all)) + (0:4)]}))
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 2 3 4 5
# [2,] 1 2 3 4 5
# [3,] 1 2 3 4 5
# [4,] 5 6 7 8 9
# [5,] 7 8 9 1 2
如果您的行没有任何5的序列,它会崩溃,如果您希望处理它,请更新您的帖子。
或者相同但更漂亮:
library(purrr)
Adf <- as.data.frame(t(A)) # data.frame fits more this data conceptually, you have different series, and it's better to put them in columns
res_df <- map_df(Adf,~ {.x[match(T,rollapply(.x!=0,5,all))+(0:4)]})
res_mat <- as.matrix(t(unname(res_df))) # if you want to go back to ugly :)
答案 1 :(得分:1)
以下是使用rle
t(apply(A, 1, function(x) {
rl <- rle(x !=0)
head(x[inverse.rle(within.list(rl, values[!(values & lengths >= 5)] <- FALSE))], 5)}))
# [,1] [,2] [,3] [,4] [,5]
#[1,] 1 2 3 4 5
#[2,] 1 2 3 4 5
#[3,] 1 2 3 4 5
#[4,] 5 6 7 8 9
#[5,] 7 8 9 1 2
答案 2 :(得分:0)
编辑:错过了一些细节,这是我尝试使用apply和基础库的新尝试:
cumfun <- function(x){
y<-ifelse(x>0,1,0)
tmp<-cumsum(y)
pos<-which(tmp-cummax((!y)*tmp)==5)
x[(pos-4) : pos]
}
B<-t(apply(A,1,cumfun))
> B
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5
[2,] 1 2 3 4 5
[3,] 1 2 3 4 5
[4,] 5 6 7 8 9
[5,] 7 8 9 1 2
答案 3 :(得分:0)
library(zoo)
t(apply(A, MAR = 1, function(x, n = 5) x[which(rollsum(!!x, n)==n)[1]+seq_len(n)-1]))
[,1] [,2] [,3] [,4] [,5]
[1,] 1 2 3 4 5
[2,] 1 2 3 4 5
[3,] 1 2 3 4 5
[4,] 5 6 7 8 9
[5,] 7 8 9 1 2