我合并了这两个数据帧,我想取列的平均值并将它们合并为一个数据帧。这是我的数据框合并:
ARN ADOPTION.x EUTHANASIA.x OTHER.x ADOPTION.y EUTHANASIA.y OTHER.y
99645 A1428364 0.998701572418 0.0000001962661 0.001298154 0.990897715091705 0.00000014520751 0.009102128
99646 A1428368 0.000122911530 0.9927836656570 0.007093379 0.00270643248222768 0.00152872898616 0.995764852
99647 A1428369 0.978477597237 0.0000004987147 0.021521868 0.933309495449066 0.00000007226861 0.066690445
99648 A1428370 0.004124862142 0.8476260900497 0.148249015 0.000234879378695041 0.87564605474472 0.124119066
99649 A4081681 0.464215517044 0.0024338855874 0.533350587 0.699311196804047 0.04458642378449 0.256102324
99650 A4087524 0.000006879755 0.0021564548369 0.997836649 0.000240802211919799 0.00144882441964 0.998310328
例如,我想采用每列采用的平均值x + adoption.y,然后将列#34采用。"然后重复每列。我该怎么做呢?
答案 0 :(得分:2)
我们可以对以.x
和.y
结尾的列进行子集并取平均值
d1 <- (df1[grep("\\.x$", names(df1))] + df1[grep("\\.y$", names(df1))])/2
colnames(d1) <- c("ADOPTION", "EUTHANASIA", "OTHER")
d2 <- cbind(df1["ARN"], d1)
d2
# ARN ADOPTION EUTHANASIA OTHER
#99645 A1428364 0.994799644 0.0000001707368 0.005200141
#99646 A1428368 0.001414672 0.4971561973216 0.501429116
#99647 A1428369 0.955893546 0.0000002854917 0.044106157
#99648 A1428370 0.002179871 0.8616360723972 0.136184040
#99649 A4081681 0.581763357 0.0235101546859 0.394726456
#99650 A4087524 0.000123841 0.0018026396283 0.998073488
或另一个选项是melt
为长格式,然后使用dcast
转换为'wide'
library(data.table)
dM <- melt(setDT(df1), measure = patterns("\\.x$", "\\.y$"), value.name =
c("x", "y"))[, variable := c("ADOPTION", "EUTHANASIA", "OTHER")[variable]
][, .(ARN, variable, value = (x+y)/2)]
dcast(dM, ARN~variable, value.var="value")
# ARN ADOPTION EUTHANASIA OTHER
#1: A1428364 0.994799644 0.0000001707368 0.005200141
#2: A1428368 0.001414672 0.4971561973216 0.501429116
#3: A1428369 0.955893546 0.0000002854917 0.044106157
#4: A1428370 0.002179871 0.8616360723972 0.136184040
#5: A4081681 0.581763357 0.0235101546859 0.394726456
#6: A4087524 0.000123841 0.0018026396283 0.998073488
答案 1 :(得分:1)
@akrun建议的长格式+聚合方法的变体:
datlong <- reshape(dat, idvar="ARN", sep=".", direction="long", varying=-1)
nms <- names(datlong)[-c(1,2)]
datlong$seq <- with(datlong, ave(time,time,FUN=seq_along) )
aggregate(datlong[nms], datlong[c("ARN","seq")], FUN=mean)[-2]
# ARN ADOPTION EUTHANASIA OTHER
#1 A1428364 0.994799644 1.707368e-07 0.005200141
#2 A1428368 0.001414672 4.971562e-01 0.501429116
#3 A1428369 0.955893546 2.854917e-07 0.044106157
#4 A1428370 0.002179871 8.616361e-01 0.136184040
#5 A4081681 0.581763357 2.351015e-02 0.394726456
#6 A4087524 0.000123841 1.802640e-03 0.998073488
答案 2 :(得分:0)
Hadleyverse版本:
library(dplyr)
library(tidyr)
# melt data into long form
df %>% gather(var, val, -ARN) %>%
# separate suffixes into new column
separate(var, c('var', 'origin')) %>%
# spread back to wide form
spread(var, val) %>%
# set grouping for aggregation
group_by(ARN) %>%
# aggregate each variable within each group
summarise_each(funs(mean), -origin)
# Source: local data frame [6 x 4]
#
# ARN ADOPTION EUTHANASIA OTHER
# (fctr) (dbl) (dbl) (dbl)
# 1 A1428364 0.994799644 1.707368e-07 0.005200141
# 2 A1428368 0.001414672 4.971562e-01 0.501429116
# 3 A1428369 0.955893546 2.854917e-07 0.044106157
# 4 A1428370 0.002179871 8.616361e-01 0.136184040
# 5 A4081681 0.581763357 2.351015e-02 0.394726456
# 6 A4087524 0.000123841 1.802640e-03 0.998073488
请注意,不是合并,而是按ID列,行绑定和聚合分组要容易得多,即
。bind_rows(df1, df2) %>%
group_by(ARN) %>%
summarise_each(funs(mean))
或在基础R
aggregate(. ~ ARN, rbind(df1, df2), mean)
或data.table
library(data.table)
setDT(rbind(df1, df2))[, lapply(.SD, mean), by = ARN]
df <- structure(list(ARN = structure(1:6, .Label = c("A1428364", "A1428368",
"A1428369", "A1428370", "A4081681", "A4087524"), class = "factor"),
ADOPTION.x = c(0.998701572418, 0.00012291153, 0.978477597237,
0.004124862142, 0.464215517044, 6.879755e-06), EUTHANASIA.x = c(1.962661e-07,
0.992783665657, 4.987147e-07, 0.8476260900497, 0.0024338855874,
0.0021564548369), OTHER.x = c(0.001298154, 0.007093379, 0.021521868,
0.148249015, 0.533350587, 0.997836649), ADOPTION.y = c(0.990897715091705,
0.00270643248222768, 0.933309495449066, 0.000234879378695041,
0.699311196804047, 0.000240802211919799), EUTHANASIA.y = c(1.4520751e-07,
0.00152872898616, 7.226861e-08, 0.87564605474472, 0.04458642378449,
0.00144882441964), OTHER.y = c(0.009102128, 0.995764852,
0.066690445, 0.124119066, 0.256102324, 0.998310328)), .Names = c("ARN",
"ADOPTION.x", "EUTHANASIA.x", "OTHER.x", "ADOPTION.y", "EUTHANASIA.y",
"OTHER.y"), row.names = c(NA, -6L), class = "data.frame")
df1 <- structure(list(ARN = structure(1:6, .Label = c("A1428364", "A1428368",
"A1428369", "A1428370", "A4081681", "A4087524"), class = "factor"),
ADOPTION = c(0.998701572418, 0.00012291153, 0.978477597237,
0.004124862142, 0.464215517044, 6.879755e-06), EUTHANASIA = c(1.962661e-07,
0.992783665657, 4.987147e-07, 0.8476260900497, 0.0024338855874,
0.0021564548369), OTHER = c(0.001298154, 0.007093379, 0.021521868,
0.148249015, 0.533350587, 0.997836649)), row.names = c(NA,
-6L), class = "data.frame", .Names = c("ARN", "ADOPTION", "EUTHANASIA",
"OTHER"))
df2 <- structure(list(ARN = structure(1:6, .Label = c("A1428364", "A1428368",
"A1428369", "A1428370", "A4081681", "A4087524"), class = "factor"),
ADOPTION = c(0.990897715091705, 0.00270643248222768, 0.933309495449066,
0.000234879378695041, 0.699311196804047, 0.000240802211919799
), EUTHANASIA = c(1.4520751e-07, 0.00152872898616, 7.226861e-08,
0.87564605474472, 0.04458642378449, 0.00144882441964), OTHER = c(0.009102128,
0.995764852, 0.066690445, 0.124119066, 0.256102324, 0.998310328
)), row.names = c(NA, -6L), class = "data.frame", .Names = c("ARN",
"ADOPTION", "EUTHANASIA", "OTHER"))