合并列表中的数据框

时间:2016-06-08 13:20:09

标签: r list dataframe

我有一个类似的列表,

lapply(sample_list, head, 3)

$`2016-04-24 00:00:00.tcp`
   ports freq
8    443  296
12    80  170
5     23   92

$`2016-04-24 00:00:00.udp`
  ports freq
4   161  138
7    53   45
1   123   28

$`2016-04-24 01:00:00.tcp`
   ports freq
13   443  342
20    80  215
10    25   60

$`2016-04-24 01:00:00.udp`
   ports freq
4    161   85
8     53   42
12   902   27

我想合并来自同一协议的数据帧(即tcpudp在一起) 所以最终结果将是一个包含2个数据帧的新列表;一个用于tcp,另一个用于udp

lapply(final_list, head, 3)

$tcp
  ports freq.00:00:00 freq.01:00:00
1   443           296           342
2    80           170           215
3    23            92            51

$udp
  ports freq.00:00:00 freq.01:00:00
1   161           138            85
2    53            45            42
3   123            28            19

数据

dput(sample_list)
structure(list(`2016-04-24 00:00:00.tcp` = structure(list(ports = c("443", 
"80", "23", "21", "22", "25", "445", "110", "389", "135", "465", 
"514", "91", "995", "84", "902"), freq = structure(c(296L, 170L, 
92L, 18L, 16L, 15L, 14L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Dim = 16L)), .Names = c("ports", 
"freq"), row.names = c(8L, 12L, 5L, 3L, 4L, 6L, 9L, 1L, 7L, 2L, 
10L, 11L, 15L, 16L, 13L, 14L), class = "data.frame"), `2016-04-24 00:00:00.udp` = structure(list(
    ports = c("161", "53", "123", "902", "137", "514", "138", 
    "623", "69", "88", "500"), freq = structure(c(138L, 45L, 
    28L, 26L, 24L, 24L, 6L, 6L, 5L, 4L, 1L), .Dim = 11L)), .Names = c("ports", 
"freq"), row.names = c(4L, 7L, 1L, 11L, 2L, 6L, 3L, 8L, 9L, 10L, 
5L), class = "data.frame"), `2016-04-24 01:00:00.tcp` = structure(list(
    ports = c("443", "80", "25", "23", "88", "21", "161", "22", 
    "445", "135", "389", "993", "548", "110", "143", "502", "514", 
    "81", "995", "102", "111", "311", "444", "789", "902", "91"
    ), freq = structure(c(342L, 215L, 60L, 51L, 42L, 32L, 31L, 
    18L, 18L, 6L, 5L, 4L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Dim = 26L)), .Names = c("ports", "freq"
), row.names = c(13L, 20L, 10L, 9L, 22L, 7L, 6L, 8L, 15L, 4L, 
12L, 25L, 18L, 2L, 5L, 16L, 17L, 21L, 26L, 1L, 3L, 11L, 14L, 
19L, 23L, 24L), class = "data.frame"), `2016-04-24 01:00:00.udp` = structure(list(
    ports = c("161", "53", "902", "514", "123", "137", "69", 
    "138", "389", "443", "88", "623"), freq = structure(c(85L, 
    42L, 27L, 24L, 19L, 15L, 15L, 4L, 2L, 2L, 2L, 1L), .Dim = 12L)), .Names = c("ports", 
"freq"), row.names = c(4L, 8L, 12L, 7L, 1L, 2L, 10L, 3L, 5L, 
6L, 11L, 9L), class = "data.frame")), .Names = c("2016-04-24 00:00:00.tcp", 
"2016-04-24 00:00:00.udp", "2016-04-24 01:00:00.tcp", "2016-04-24 01:00:00.udp"
))

奖金问题:freq的结构是什么?我之前从未见过int [1:16(1d)]

str(sample_list$`2016-04-24 00:00:00.tcp`)
'data.frame':   16 obs. of  2 variables:
 $ ports: chr  "443" "80" "23" "21" ...
 $ freq : int [1:16(1d)] 296 170 92 18 16 15 14 4 3 2 ...

我用来创建列表的代码(在本例中称为try1

protocol_list <- lapply(per_hour1, function(i) split(i, i$protocol))
Analytic_Protocol_List <- lapply(protocol_list, function(i) lapply(i, dest.ports))
try1 <- lapply(unlist(Analytic_Protocol_List, recursive = FALSE), `[[`, 1)

请注意,similar questions的解决方案不适用于此情况。也许是因为结构?

3 个答案:

答案 0 :(得分:2)

对于rbind,您可以尝试以下操作:

do.call(rbind, sample_list[grep("tcp", names(sample_list))])

do.call(rbind, sample_list[grep("udp", names(sample_list))])

并由马拉特在下面提炼:

d <- do.call(rbind, sample_list)
d2 <- data.frame(d,do.call(rbind,strsplit(rownames((d)),'[.]')))
lapply(split(d2,d2$X2),dcast,ports~X1,value.var='freq')

答案 1 :(得分:2)

另一种选择:

library(dplyr)
library(tidyr)

data.table::melt(sample_list) %>%
  separate(L1, into = c("time", "protocol"), sep = "\\.") %>%
  unite(f, variable, time) %>%
  spread(f, value) %>%
  split(.$protocol)

使用您的数据,提供:

$tcp
   ports protocol freq_2016-04-24 00:00:00 freq_2016-04-24 01:00:00
1    102      tcp                       NA                        1
2    110      tcp                        4                        2
3    111      tcp                       NA                        1
5    135      tcp                        2                        6
8    143      tcp                       NA                        2
9    161      tcp                       NA                       31
11    21      tcp                       18                       32
12    22      tcp                       16                       18
13    23      tcp                       92                       51
14    25      tcp                       15                       60
15   311      tcp                       NA                        1
16   389      tcp                        3                        5
18   443      tcp                      296                      342
20   444      tcp                       NA                        1
21   445      tcp                       14                       18
22   465      tcp                        2                       NA
24   502      tcp                       NA                        2
25   514      tcp                        2                        2
28   548      tcp                       NA                        3
31   789      tcp                       NA                        1
32    80      tcp                      170                      215
33    81      tcp                       NA                        2
34    84      tcp                        1                       NA
35    88      tcp                       NA                       42
37   902      tcp                        1                        1
39    91      tcp                        2                        1
40   993      tcp                       NA                        4
41   995      tcp                        2                        2

$udp
   ports protocol freq_2016-04-24 00:00:00 freq_2016-04-24 01:00:00
4    123      udp                       28                       19
6    137      udp                       24                       15
7    138      udp                        6                        4
10   161      udp                      138                       85
17   389      udp                       NA                        2
19   443      udp                       NA                        2
23   500      udp                        1                       NA
26   514      udp                       24                       24
27    53      udp                       45                       42
29   623      udp                        6                        1
30    69      udp                        5                       15
36    88      udp                        4                        2
38   902      udp                       26                       27

更新

如果您想按freq排序,可以这样做:

data.table::melt(sample_list) %>%
  separate(L1, into = c("time", "protocol"), sep = "\\.") %>%
  unite(f, variable, time) %>%
  spread(f, value) %>%
  arrange(protocol, desc(`freq_2016-04-24 00:00:00`)) %>%
  split(.$protocol) 

答案 2 :(得分:0)

你可以按ID合并 为数据框的每一行创建一个ID 让lappy(X)= x

  x$1 <- cbind(ID=1:nrow(x$1)) 

对于x1,x2,x3 ....,xN

相同
  newx <- merge(x$1,x$2,...,x$N, by=ID)

因为使用了id合并重叠不会发生,jusıt每个列表$(X)作为数据帧本身