我有一个类似的列表,
lapply(sample_list, head, 3)
$`2016-04-24 00:00:00.tcp`
ports freq
8 443 296
12 80 170
5 23 92
$`2016-04-24 00:00:00.udp`
ports freq
4 161 138
7 53 45
1 123 28
$`2016-04-24 01:00:00.tcp`
ports freq
13 443 342
20 80 215
10 25 60
$`2016-04-24 01:00:00.udp`
ports freq
4 161 85
8 53 42
12 902 27
我想合并来自同一协议的数据帧(即tcp
和udp
在一起)
所以最终结果将是一个包含2个数据帧的新列表;一个用于tcp
,另一个用于udp
,
lapply(final_list, head, 3)
$tcp
ports freq.00:00:00 freq.01:00:00
1 443 296 342
2 80 170 215
3 23 92 51
$udp
ports freq.00:00:00 freq.01:00:00
1 161 138 85
2 53 45 42
3 123 28 19
数据
dput(sample_list)
structure(list(`2016-04-24 00:00:00.tcp` = structure(list(ports = c("443",
"80", "23", "21", "22", "25", "445", "110", "389", "135", "465",
"514", "91", "995", "84", "902"), freq = structure(c(296L, 170L,
92L, 18L, 16L, 15L, 14L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Dim = 16L)), .Names = c("ports",
"freq"), row.names = c(8L, 12L, 5L, 3L, 4L, 6L, 9L, 1L, 7L, 2L,
10L, 11L, 15L, 16L, 13L, 14L), class = "data.frame"), `2016-04-24 00:00:00.udp` = structure(list(
ports = c("161", "53", "123", "902", "137", "514", "138",
"623", "69", "88", "500"), freq = structure(c(138L, 45L,
28L, 26L, 24L, 24L, 6L, 6L, 5L, 4L, 1L), .Dim = 11L)), .Names = c("ports",
"freq"), row.names = c(4L, 7L, 1L, 11L, 2L, 6L, 3L, 8L, 9L, 10L,
5L), class = "data.frame"), `2016-04-24 01:00:00.tcp` = structure(list(
ports = c("443", "80", "25", "23", "88", "21", "161", "22",
"445", "135", "389", "993", "548", "110", "143", "502", "514",
"81", "995", "102", "111", "311", "444", "789", "902", "91"
), freq = structure(c(342L, 215L, 60L, 51L, 42L, 32L, 31L,
18L, 18L, 6L, 5L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Dim = 26L)), .Names = c("ports", "freq"
), row.names = c(13L, 20L, 10L, 9L, 22L, 7L, 6L, 8L, 15L, 4L,
12L, 25L, 18L, 2L, 5L, 16L, 17L, 21L, 26L, 1L, 3L, 11L, 14L,
19L, 23L, 24L), class = "data.frame"), `2016-04-24 01:00:00.udp` = structure(list(
ports = c("161", "53", "902", "514", "123", "137", "69",
"138", "389", "443", "88", "623"), freq = structure(c(85L,
42L, 27L, 24L, 19L, 15L, 15L, 4L, 2L, 2L, 2L, 1L), .Dim = 12L)), .Names = c("ports",
"freq"), row.names = c(4L, 8L, 12L, 7L, 1L, 2L, 10L, 3L, 5L,
6L, 11L, 9L), class = "data.frame")), .Names = c("2016-04-24 00:00:00.tcp",
"2016-04-24 00:00:00.udp", "2016-04-24 01:00:00.tcp", "2016-04-24 01:00:00.udp"
))
奖金问题:freq
的结构是什么?我之前从未见过int [1:16(1d)]
。
str(sample_list$`2016-04-24 00:00:00.tcp`)
'data.frame': 16 obs. of 2 variables:
$ ports: chr "443" "80" "23" "21" ...
$ freq : int [1:16(1d)] 296 170 92 18 16 15 14 4 3 2 ...
我用来创建列表的代码(在本例中称为try1
)
protocol_list <- lapply(per_hour1, function(i) split(i, i$protocol))
Analytic_Protocol_List <- lapply(protocol_list, function(i) lapply(i, dest.ports))
try1 <- lapply(unlist(Analytic_Protocol_List, recursive = FALSE), `[[`, 1)
请注意,similar questions的解决方案不适用于此情况。也许是因为结构?
答案 0 :(得分:2)
对于rbind
,您可以尝试以下操作:
do.call(rbind, sample_list[grep("tcp", names(sample_list))])
和
do.call(rbind, sample_list[grep("udp", names(sample_list))])
并由马拉特在下面提炼:
d <- do.call(rbind, sample_list)
d2 <- data.frame(d,do.call(rbind,strsplit(rownames((d)),'[.]')))
lapply(split(d2,d2$X2),dcast,ports~X1,value.var='freq')
答案 1 :(得分:2)
另一种选择:
library(dplyr)
library(tidyr)
data.table::melt(sample_list) %>%
separate(L1, into = c("time", "protocol"), sep = "\\.") %>%
unite(f, variable, time) %>%
spread(f, value) %>%
split(.$protocol)
使用您的数据,提供:
$tcp
ports protocol freq_2016-04-24 00:00:00 freq_2016-04-24 01:00:00
1 102 tcp NA 1
2 110 tcp 4 2
3 111 tcp NA 1
5 135 tcp 2 6
8 143 tcp NA 2
9 161 tcp NA 31
11 21 tcp 18 32
12 22 tcp 16 18
13 23 tcp 92 51
14 25 tcp 15 60
15 311 tcp NA 1
16 389 tcp 3 5
18 443 tcp 296 342
20 444 tcp NA 1
21 445 tcp 14 18
22 465 tcp 2 NA
24 502 tcp NA 2
25 514 tcp 2 2
28 548 tcp NA 3
31 789 tcp NA 1
32 80 tcp 170 215
33 81 tcp NA 2
34 84 tcp 1 NA
35 88 tcp NA 42
37 902 tcp 1 1
39 91 tcp 2 1
40 993 tcp NA 4
41 995 tcp 2 2
$udp
ports protocol freq_2016-04-24 00:00:00 freq_2016-04-24 01:00:00
4 123 udp 28 19
6 137 udp 24 15
7 138 udp 6 4
10 161 udp 138 85
17 389 udp NA 2
19 443 udp NA 2
23 500 udp 1 NA
26 514 udp 24 24
27 53 udp 45 42
29 623 udp 6 1
30 69 udp 5 15
36 88 udp 4 2
38 902 udp 26 27
更新:
如果您想按freq
排序,可以这样做:
data.table::melt(sample_list) %>%
separate(L1, into = c("time", "protocol"), sep = "\\.") %>%
unite(f, variable, time) %>%
spread(f, value) %>%
arrange(protocol, desc(`freq_2016-04-24 00:00:00`)) %>%
split(.$protocol)
答案 2 :(得分:0)
你可以按ID合并 为数据框的每一行创建一个ID 让lappy(X)= x
x$1 <- cbind(ID=1:nrow(x$1))
对于x1,x2,x3 ....,xN
相同 newx <- merge(x$1,x$2,...,x$N, by=ID)
因为使用了id合并重叠不会发生,jusıt每个列表$(X)作为数据帧本身