我得到了一个矩阵,数据框或数据表。我想创建一个矩阵,其中对角线为向上/反向为行,其余单元格为NA。我能够做到这一点。但我认为,应该有一个更容易,更简单的解决方案。因此,任何解决方案都值得赞赏。
作为一个例子,假设我得到以下数据。表
df1<-as.data.table(matrix(seq(1:100),nrow=20, byrow = TRUE))
> structure(df1)
V1 V2 V3 V4 V5
1: 1 2 3 4 5
2: 6 7 8 9 10
3: 11 12 13 14 15
4: 16 17 18 19 20
5: 21 22 23 24 25
6: 26 27 28 29 30
7: 31 32 33 34 35
8: 36 37 38 39 40
9: 41 42 43 44 45
10: 46 47 48 49 50
11: 51 52 53 54 55
12: 56 57 58 59 60
13: 61 62 63 64 65
14: 66 67 68 69 70
15: 71 72 73 74 75
16: 76 77 78 79 80
17: 81 82 83 84 85
18: 86 87 88 89 90
19: 91 92 93 94 95
20: 96 97 98 99 100
> dput(df1)
structure(list(V1 = c(1L, 6L, 11L, 16L, 21L, 26L, 31L, 36L, 41L,
46L, 51L, 56L, 61L, 66L, 71L, 76L, 81L, 86L, 91L, 96L), V2 = c(2L,
7L, 12L, 17L, 22L, 27L, 32L, 37L, 42L, 47L, 52L, 57L, 62L, 67L,
72L, 77L, 82L, 87L, 92L, 97L), V3 = c(3L, 8L, 13L, 18L, 23L,
28L, 33L, 38L, 43L, 48L, 53L, 58L, 63L, 68L, 73L, 78L, 83L, 88L,
93L, 98L), V4 = c(4L, 9L, 14L, 19L, 24L, 29L, 34L, 39L, 44L,
49L, 54L, 59L, 64L, 69L, 74L, 79L, 84L, 89L, 94L, 99L), V5 = c(5L,
10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L,
75L, 80L, 85L, 90L, 95L, 100L)), row.names = c(NA, -20L), class = c("data.table","data.frame"), .internal.selfref = <pointer: 0x1038072e0>)
然后下面的代码完成工作:
df1 <- df1[nrow(df1):1,]
df1 <- as.data.table(df1)
n <- ncol(df1)
df1 <- mapply(function(x, y) shift(x, n=y, type = "lead"), df1, seq_len(n)-1)
df1 <- df1[nrow(df1):1,]
然后我得到想要的结果:
> structure(df1)
V1 V2 V3 V4 V5
[1,] 1 NA NA NA NA
[2,] 6 2 NA NA NA
[3,] 11 7 3 NA NA
[4,] 16 12 8 4 NA
[5,] 21 17 13 9 5
[6,] 26 22 18 14 10
[7,] 31 27 23 19 15
[8,] 36 32 28 24 20
[9,] 41 37 33 29 25
[10,] 46 42 38 34 30
[11,] 51 47 43 39 35
[12,] 56 52 48 44 40
[13,] 61 57 53 49 45
[14,] 66 62 58 54 50
[15,] 71 67 63 59 55
[16,] 76 72 68 64 60
[17,] 81 77 73 69 65
[18,] 86 82 78 74 70
[19,] 91 87 83 79 75
[20,] 96 92 88 84 80
structure(c(1L, 6L, 11L, 16L, 21L, 26L, 31L, 36L, 41L, 46L, 51L,
56L, 61L, 66L, 71L, 76L, 81L, 86L, 91L, 96L, NA, 2L, 7L, 12L,
17L, 22L, 27L, 32L, 37L, 42L, 47L, 52L, 57L, 62L, 67L, 72L, 77L,
82L, 87L, 92L, NA, NA, 3L, 8L, 13L, 18L, 23L, 28L, 33L, 38L,
43L, 48L, 53L, 58L, 63L, 68L, 73L, 78L, 83L, 88L, NA, NA, NA,
4L, 9L, 14L, 19L, 24L, 29L, 34L, 39L, 44L, 49L, 54L, 59L, 64L,
69L, 74L, 79L, 84L, NA, NA, NA, NA, 5L, 10L, 15L, 20L, 25L, 30L,
35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L), .Dim = c(20L,
5L), .Dimnames = list(NULL, c("V1", "V2", "V3", "V4", "V5")))
答案 0 :(得分:4)
一种选择是将n
的{{1}}参数从0传递到4,该参数对应于Data.table(shift
)子集中的每一列
.SD
df1[, Map(shift, .SD, n = 0:4)]
# V1 V2 V3 V4 V5
# 1: 1 NA NA NA NA
# 2: 6 2 NA NA NA
# 3: 11 7 3 NA NA
# 4: 16 12 8 4 NA
# 5: 21 17 13 9 5
# 6: 26 22 18 14 10
# 7: 31 27 23 19 15
# 8: 36 32 28 24 20
# 9: 41 37 33 29 25
#10: 46 42 38 34 30
#11: 51 47 43 39 35
#12: 56 52 48 44 40
#13: 61 57 53 49 45
#14: 66 62 58 54 50
#15: 71 67 63 59 55
#16: 76 72 68 64 60
#17: 81 77 73 69 65
#18: 86 82 78 74 70
#19: 91 87 83 79 75
#20: 96 92 88 84 80
将是“滞后”而不是“领先”,默认情况下是“滞后”