我有一个数据框,其第一列(weights
)包含一个列表(数据帧?):
> head(data$weights)
> data <- structure(list(A373R11 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.151631702143023, Signature.4 = 0.149799882118262,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0.0634912587993959,
Signature.8 = 0, Signature.9 = 0.173189155080817, Signature.10 = 0,
Signature.11 = 0, Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0, Signature.18 = 0,
Signature.19 = 0, Signature.20 = 0, Signature.21 = 0.0905517653558877,
Signature.R1 = 0, Signature.R2 = 0, Signature.R3 = 0, Signature.U1 = 0.155590748898003,
Signature.U2 = 0.145955461287919), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.R1", "Signature.R2", "Signature.R3", "Signature.U1",
"Signature.U2"), row.names = "A373R11", class = "data.frame"),
A373R13 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.221014874027829, Signature.4 = 0,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0.279252211893692,
Signature.9 = 0, Signature.10 = 0, Signature.11 = 0,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0.115216422668955, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.0636987713225648, Signature.U1 = 0.108875099907467,
Signature.U2 = 0), .Names = c("Signature.1A", "Signature.1B",
"Signature.2", "Signature.3", "Signature.4", "Signature.5",
"Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.R1", "Signature.R2", "Signature.R3", "Signature.U1",
"Signature.U2"), row.names = "A373R13", class = "data.frame"),
A373R3 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.0795605471131758, Signature.4 = 0.0973130562439999,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0.249674548796242,
Signature.9 = 0.0725013504411567, Signature.10 = 0, Signature.11 = 0.064665155855146,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.0703546703126821, Signature.U1 = 0.21753544296676,
Signature.U2 = 0.0739201832004727), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R3", class = "data.frame"),
A373R5 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.113996509522102, Signature.4 = 0.114874220936966,
Signature.5 = 0.142056872670519, Signature.6 = 0, Signature.7 = 0,
Signature.8 = 0.208376707959741, Signature.9 = 0.0744527503782136,
Signature.10 = 0, Signature.11 = 0, Signature.12 = 0,
Signature.13 = 0, Signature.14 = 0, Signature.15 = 0.0771902641012979,
Signature.16 = 0, Signature.17 = 0, Signature.18 = 0,
Signature.19 = 0, Signature.20 = 0, Signature.21 = 0,
Signature.R1 = 0, Signature.R2 = 0, Signature.R3 = 0,
Signature.U1 = 0.0673567355607731, Signature.U2 = 0), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R5", class = "data.frame"),
A373R9 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.116847300193985, Signature.4 = 0,
Signature.5 = 0.21624751052703, Signature.6 = 0, Signature.7 = 0,
Signature.8 = 0.252498230882402, Signature.9 = 0, Signature.10 = 0,
Signature.11 = 0.119495912880994, Signature.12 = 0, Signature.13 = 0,
Signature.14 = 0, Signature.15 = 0, Signature.16 = 0,
Signature.17 = 0, Signature.18 = 0, Signature.19 = 0,
Signature.20 = 0, Signature.21 = 0, Signature.R1 = 0,
Signature.R2 = 0, Signature.R3 = 0.0725549911220892,
Signature.U1 = 0, Signature.U2 = 0), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R9", class = "data.frame"),
A512R19 = structure(list(Signature.1A = 0.109490572493859,
Signature.1B = 0, Signature.2 = 0, Signature.3 = 0, Signature.4 = 0.22010156823306,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0,
Signature.9 = 0, Signature.10 = 0, Signature.11 = 0,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.150943894106973, Signature.U1 = 0.248556502648564,
Signature.U2 = 0.119306892617062), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A512R19", class = "data.frame")), .Names = c("A373R11",
"A373R13", "A373R3", "A373R5", "A373R9", "A512R19"))
此处,每行包含一个样本,每列包含特定签名的分数:
> data[1]
$A373R11
Signature.1A Signature.1B Signature.2 Signature.3 Signature.4 Signature.5 Signature.6 Signature.7 Signature.8 Signature.9 Signature.10 Signature.11
A373R11 0 0 0 0.1516317 0.1497999 0 0 0.06349126 0 0.1731892 0 0
Signature.12 Signature.13 Signature.14 Signature.15 Signature.16 Signature.17 Signature.18 Signature.19 Signature.20 Signature.21 Signature.R1 Signature.R2
A373R11 0 0 0 0 0 0 0 0 0 0.09055177 0 0
Signature.R3 Signature.U1 Signature.U2
A373R11 0 0.1555907 0.1459555
我想将其转换为具有以下结构的数据框:
sample signature score
A373R11 Signature.1A 0
A373R11 Signature.1B 0
[...]
A373R13 Signature.1A 0
A373R13 Signature.1B 0
[...]
有人能指出我正确的方向吗?
答案 0 :(得分:9)
两种方法:
1)使用data.table-package
使用:
library(data.table)
melt(rbindlist(data, idcol = 'sample'),
id = 'sample', variable.name = 'signature', value.name = 'score')
给出:
sample signature score 1: A373R11 Signature.1A 0.00000000 2: A373R13 Signature.1A 0.00000000 3: A373R3 Signature.1A 0.00000000 4: A373R5 Signature.1A 0.00000000 5: A373R9 Signature.1A 0.00000000 --- 158: A373R13 Signature.U2 0.00000000 159: A373R3 Signature.U2 0.07392018 160: A373R5 Signature.U2 0.00000000 161: A373R9 Signature.U2 0.00000000 162: A512R19 Signature.U2 0.11930689
2)基础R
使用:
dat2 <- do.call(rbind, dat)
reshape(dat2, idvar = 'sample', ids = row.names(dat2),
varying = list(1:ncol(dat2)), times = colnames(dat2),
timevar = 'signature', v.names = 'score',
new.row.names = NULL, direction = 'long')
给出:
signature score sample A373R11.Signature.1A Signature.1A 0.00000000 A373R11 A373R13.Signature.1A Signature.1A 0.00000000 A373R13 A373R3.Signature.1A Signature.1A 0.00000000 A373R3 A373R5.Signature.1A Signature.1A 0.00000000 A373R5 A373R9.Signature.1A Signature.1A 0.00000000 A373R9 ..... A373R13.Signature.U2 Signature.U2 0.00000000 A373R13 A373R3.Signature.U2 Signature.U2 0.07392018 A373R3 A373R5.Signature.U2 Signature.U2 0.00000000 A373R5 A373R9.Signature.U2 Signature.U2 0.00000000 A373R9 A512R19.Signature.U2 Signature.U2 0.11930689 A512R19
注:
最好不要为数据提供与函数相同的名称。请参阅?data
。
答案 1 :(得分:7)
if (Number($('#field').val()) <= 5) { alert('Number should not be more than 5'); }
解决方案,我们首先将所有data.frames连接在一起,然后使用tidyverse
根据需要重新整形它们:
gather
给出:
library(dplyr) library(tidyr) data %>% bind_rows(.id = 'sample') %>% gather(signature, score, -sample)
可以写成没有管道的单行代码:
sample signature score
1 A373R11 Signature.1A 0.00000000
2 A373R13 Signature.1A 0.00000000
3 A373R3 Signature.1A 0.00000000
4 A373R5 Signature.1A 0.00000000
5 A373R9 Signature.1A 0.00000000
6 A512R19 Signature.1A 0.10949057
7 A373R11 Signature.1B 0.00000000
8 A373R13 Signature.1B 0.00000000
9 A373R3 Signature.1B 0.00000000
10 A373R5 Signature.1B 0.00000000
....
答案 2 :(得分:7)
以下是rapply
的基础R替代方案。请注意,我已重命名您的data.frame dat。
# pull out the values and their attached names with rapply
myVec <- rapply(unname(dat), identity)
# even better:
# myVec <- unlist(unname(dat))
# construct the data.frame
mydf <- data.frame(sample=rep(names(dat), lengths(dat)),
signature=names(myVec),
score=myVec,
stringsAsFactors=FALSE, row.names = seq_along(myVec))
返回
head(mydf)
sample signature score
1 A373R11 Signature.1A 0.0000000
2 A373R11 Signature.1B 0.0000000
3 A373R11 Signature.2 0.0000000
4 A373R11 Signature.3 0.1516317
5 A373R11 Signature.4 0.1497999
6 A373R11 Signature.5 0.0000000
默认情况下,rapply
对每个列表的最终元素执行一个函数,返回一个向量。我使用identity
来返回这些元素。由于每个元素都有一个与之关联的名称,rapply
将返回一个命名向量。
我使用unname
去除每个外部列表项的名称。这使得下一步的数据构建变得更加容易。否则,命名向量将具有类似“A373R11.Signature.5”的名称,这将需要更多的工作来返回所需的结果。
答案 3 :(得分:4)
首先,您将data.frames(A=[]
[A.extend(item)for item in df.values.tolist() ]
A = list(set([i for i in A if i is not None]))
A
Out[1224]: ['a', 'b', 'c', 'd', 'z']
)组合在一起,然后将它们转换为矩阵以保留rownames并将其融合。
rbind
带管道的清洁解决方案
library(data.table)
res <- melt(as.matrix(do.call(rbind, data)))
colnames(res) <- c("sample", "signature", "score")