我有一个包含hold + - 38列表的列表列表。应该只选择几个(其余的没有值,例如NULL)。我想为这些列表制作一个很好的数据框。
我的清单列表:
structure(list(NULL, AFT = NULL, `AP-2` = NULL, `AT_hook, ETS` = NULL,
`BASIC, HLH` = NULL, BRIGHT = NULL, BRLZ = NULL, `BRLZ, BZIP_1, BZIP_2` = NULL,
bZIP = NULL, DWA = NULL, E2F_TDP = NULL, ETS = structure(list(
MASHvstRap = 8.34818462488622e-05, MASHvsBEEML = 0.000250015234002341,
tRapvsBEEML = 8.80480124829088e-06, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), FH = structure(list(
MASHvstRap = 1.72864219357795e-05, MASHvsBEEML = 0.000840376826415137,
tRapvsBEEML = 2.54589884424594e-07, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), GCM = NULL,
HLH = structure(list(MASHvstRap = 1.22573775496788e-08, MASHvsBEEML = 0.00119919900578073,
tRapvsBEEML = 3.60117573203279e-07, frequency = 13, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), HMG = structure(list(
MASHvstRap = 6.07022175358029e-30, MASHvsBEEML = 0.0994358268075855,
tRapvsBEEML = 5.3728011843321e-09, frequency = 44, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), Homeo = structure(list(
MASHvstRap = 4.33277656523673e-123, MASHvsBEEML = 0.442020719677047,
tRapvsBEEML = 8.44025048683083e-74, frequency = 158,
stringsAsFactors = 0), .Names = c("MASHvstRap", "MASHvsBEEML",
"tRapvsBEEML", "frequency", "stringsAsFactors"), row.names = c(NA,
-1L), class = "data.frame"), `Homeo ` = structure(list(MASHvstRap = 3.36388469632471e-14,
MASHvsBEEML = 0.763756578209722, tRapvsBEEML = 3.75944533892572e-07,
frequency = 19, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), `Homeo, PAX` = NULL,
`Homeo, POU` = structure(list(MASHvstRap = 3.06769943976602e-08,
MASHvsBEEML = 0.423594358667165, tRapvsBEEML = 7.51004008659922e-09,
frequency = 11, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), `HSF_DNA-bind` = NULL,
`HTH APSES-type` = NULL, IRF = structure(list(MASHvstRap = 1.25502843779857e-05,
MASHvsBEEML = 0.00094114146973297, tRapvsBEEML = 1.17030570144044e-06,
frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), MADS = NULL,
Myb = NULL, RFX = NULL, SAND = NULL, SANT = NULL, TBOX = NULL,
TBP = NULL, TEA = NULL, unknown = structure(list(MASHvstRap = 4.82890837154273e-32,
MASHvsBEEML = 0.0736357072352032, tRapvsBEEML = 7.20783906680568e-26,
frequency = 121, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), Zf_C2H2 = NULL,
Zf_GATA = NULL, Zn2Cys6 = structure(list(MASHvstRap = 4.71138538453502e-05,
MASHvsBEEML = 0.000623286035357452, tRapvsBEEML = 3.93333369828925e-07,
frequency = 17, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), ZnF_C2H2 = structure(list(
MASHvstRap = 1.62205005760679e-17, MASHvsBEEML = 1.46483433509648e-08,
tRapvsBEEML = 2.89656372293867e-25, frequency = 54, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), ZnF_C4 = structure(list(
MASHvstRap = 4.93181852868703e-06, MASHvsBEEML = 0.0467257430288347,
tRapvsBEEML = 6.69189512726035e-07, frequency = 10, stringsAsFactors = 0), .Names = c("MASHvstRap",
"MASHvsBEEML", "tRapvsBEEML", "frequency", "stringsAsFactors"
), row.names = c(NA, -1L), class = "data.frame"), ZnF_GATA = NULL), .Names = c("",
"AFT", "AP-2", "AT_hook, ETS", "BASIC, HLH", "BRIGHT", "BRLZ",
"BRLZ, BZIP_1, BZIP_2", "bZIP", "DWA", "E2F_TDP", "ETS", "FH",
"GCM", "HLH", "HMG", "Homeo", "Homeo ", "Homeo, PAX", "Homeo, POU",
"HSF_DNA-bind", "HTH APSES-type", "IRF", "MADS", "Myb", "RFX",
"SAND", "SANT", "TBOX", "TBP", "TEA", "unknown", "Zf_C2H2", "Zf_GATA",
"Zn2Cys6", "ZnF_C2H2", "ZnF_C4", "ZnF_GATA"))
你可以看到一些列表中没有值,因此应该删除。我想要的是一个有4列的数据框:family,method,p.value,frequency。在列表中,它们按照每个家庭提供,如下所示:$ Zn2Cys6
MASHvstRap MASHvsBEEML tRapvsBEEML frequency
1 4.711385e-05 0.000623286 3.933334e-07 17
所以$ zn2Cys6是系列名称,应该添加到MASHvstRap前面。因此,列表应该是不公开的,并改为具有以下列名称的数据框(family,method ='MASHvstRAp',p.value,frequency。我试图使用lapply(rbind)但这给了我一个奇怪的结构.cbind和as.data.frame也没有帮助我。
答案 0 :(得分:7)
如果您发布的structure
名为ll
,则可以执行此操作:
> do.call("rbind",ll)
MASHvstRap MASHvsBEEML tRapvsBEEML frequency
ETS 8.348185e-05 2.500152e-04 8.804801e-06 10
FH 1.728642e-05 8.403768e-04 2.545899e-07 10
HLH 1.225738e-08 1.199199e-03 3.601176e-07 13
HMG 6.070222e-30 9.943583e-02 5.372801e-09 44
Homeo 4.332777e-123 4.420207e-01 8.440250e-74 158
Homeo 3.363885e-14 7.637566e-01 3.759445e-07 19
Homeo, POU 3.067699e-08 4.235944e-01 7.510040e-09 11
IRF 1.255028e-05 9.411415e-04 1.170306e-06 10
unknown 4.828908e-32 7.363571e-02 7.207839e-26 121
Zn2Cys6 4.711385e-05 6.232860e-04 3.933334e-07 17
ZnF_C2H2 1.622050e-17 1.464834e-08 2.896564e-25 54
ZnF_C4 4.931819e-06 4.672574e-02 6.691895e-07 10
第一列是row.names
(之前是每个列表项的名称)。
答案 1 :(得分:2)
如果您将列表列表称为“pino”,则只需输入:
prova<-data.frame(matrix(unlist(pino),ncol=5,byrow=TRUE)[,1:4])
names(prova)<-c("MASHvstRap","MASHvsBEEML","tRapvsBEEML","frequency")
虽然可能有更通用的解决方案......
答案 2 :(得分:2)
这是reshape2
的解决方案:
tmp <- do.call(rbind, dat)[-5] # put data into one data frame
tmp$family = rownames(tmp) # add column for 'family'
library(reshape2)
melt(tmp, measure.vars = names(tmp)[1:3],
variable.name = "method", value.name = "p.value")
结果:
frequency family method p.value
1 10 ETS MASHvstRap 8.348185e-05
2 10 FH MASHvstRap 1.728642e-05
3 13 HLH MASHvstRap 1.225738e-08
4 44 HMG MASHvstRap 6.070222e-30
5 158 Homeo MASHvstRap 4.332777e-123
6 19 Homeo MASHvstRap 3.363885e-14
7 11 Homeo, POU MASHvstRap 3.067699e-08
8 10 IRF MASHvstRap 1.255028e-05
9 121 unknown MASHvstRap 4.828908e-32
10 17 Zn2Cys6 MASHvstRap 4.711385e-05
11 54 ZnF_C2H2 MASHvstRap 1.622050e-17
12 10 ZnF_C4 MASHvstRap 4.931819e-06
13 10 ETS MASHvsBEEML 2.500152e-04
14 10 FH MASHvsBEEML 8.403768e-04
15 13 HLH MASHvsBEEML 1.199199e-03
16 44 HMG MASHvsBEEML 9.943583e-02
17 158 Homeo MASHvsBEEML 4.420207e-01
18 19 Homeo MASHvsBEEML 7.637566e-01
19 11 Homeo, POU MASHvsBEEML 4.235944e-01
20 10 IRF MASHvsBEEML 9.411415e-04
21 121 unknown MASHvsBEEML 7.363571e-02
22 17 Zn2Cys6 MASHvsBEEML 6.232860e-04
23 54 ZnF_C2H2 MASHvsBEEML 1.464834e-08
24 10 ZnF_C4 MASHvsBEEML 4.672574e-02
25 10 ETS tRapvsBEEML 8.804801e-06
26 10 FH tRapvsBEEML 2.545899e-07
27 13 HLH tRapvsBEEML 3.601176e-07
28 44 HMG tRapvsBEEML 5.372801e-09
29 158 Homeo tRapvsBEEML 8.440250e-74
30 19 Homeo tRapvsBEEML 3.759445e-07
31 11 Homeo, POU tRapvsBEEML 7.510040e-09
32 10 IRF tRapvsBEEML 1.170306e-06
33 121 unknown tRapvsBEEML 7.207839e-26
34 17 Zn2Cys6 tRapvsBEEML 3.933334e-07
35 54 ZnF_C2H2 tRapvsBEEML 2.896564e-25
36 10 ZnF_C4 tRapvsBEEML 6.691895e-07