我有一个非常大的数据集。
> head(z)
V1 V2 V3 V4 V5 V6 V7 V8
1 chr2 215796265 216005151 ABCA12 chr2 215796298 215796299 1.000000
2 chr2 215796265 216005151 ABCA12 chr2 215796353 215796354 1.000000
3 chr2 215796265 216005151 ABCA12 chr2 215796361 215796362 1.000000
4 chr2 215796265 216005151 ABCA12 chr2 215796391 215796392 1.000000
5 chr2 215796265 216005151 ABCA12 chr2 215796435 215796436 0.666667
6 chr2 215796265 216005151 ABCA12 chr2 215796481 215796482 0.571429
> tail(z)
V1 V2 V3 V4 V5 V6 V7 V8
194618 chr16 2878172 2882285 ZG16B chr16 2882101 2882102 0.615385
194619 chr16 2878172 2882285 ZG16B chr16 2882147 2882148 0.833333
194620 chr16 2878172 2882285 ZG16B chr16 2882154 2882155 0.800000
194621 chr16 2878172 2882285 ZG16B chr16 2882179 2882180 0.444444
194622 chr16 2878172 2882285 ZG16B chr16 2882231 2882232 0.833333
194623 chr16 2878172 2882285 ZG16B chr16 2882236 2882237 1.000000
我需要做的是通过第4列(V4)中的不同名称获得最后一列(V8)的中位数,最小值和最大值。
任何人都可以帮助我吗?越简单越好。
由于
答案 0 :(得分:3)
使用dplyr
:
table1 <-read.table(text="V1 V2 V3 V4 V5 V6 V7 V8
chr2 215796265 216005151 ABCA12 chr2 215796298 215796299 1.000000
chr2 215796265 216005151 ABCA12 chr2 215796353 215796354 1.000000
chr2 215796265 216005151 ABCA12 chr2 215796361 215796362 1.000000
chr16 2878172 2882285 ZG16B chr16 2882154 2882155 0.800000
chr16 2878172 2882285 ZG16B chr16 2882179 2882180 0.444444
chr16 2878172 2882285 ZG16B chr16 2882231 2882232 0.833333
chr16 2878172 2882285 ZG16B chr16 2882236 2882237 1.000000",
header=T,stringsAsFactors =F)
library(dplyr)
table1 %>%
group_by(V4) %>%
summarise(mean=mean(V8), median=median(V8), min=min(V8),max=max(V8))
#Source: local data frame [2 x 5]
# V4 mean median min max
# (chr) (dbl) (dbl) (dbl) (dbl)
#1 ABCA12 1.0000000 1.0000000 1.000000 1
#2 ZG16B 0.7694443 0.8166665 0.444444 1
答案 1 :(得分:0)
使用aggregate()
:
使用您的头尾发布数据:
mean <- aggregate(list(mean=df$V8), list(df$V4), FUN=mean, na.rm=TRUE)
median <- aggregate(list(median=df$V8), list(df$V4), FUN=median, na.rm=TRUE)
min <- aggregate(list(min=df$V8), list(df$V4), FUN=min, na.rm=TRUE)
max <- aggregate(list(max=df$V8), list(df$V4), FUN=max, na.rm=TRUE)
aggregatedf <- merge(merge(merge(mean, median), min), max)
print(aggregatedf)
<强>输出强>
Group.1 mean median min max
1 ABCA12 0.8730160 1.0000000 0.571429 1
2 ZG16B 0.7544158 0.8166665 0.444444 1
答案 2 :(得分:0)
使用describeBy
包中的psych
功能:
library(psych)
d <- describeBy(table1$V8, table1$V4, mat = TRUE)
d[, c("group1", "mean", "median", "min", "max")]
<强>输出强>
group1 mean median min max
11 ABCA12 1.0000000 1.0000000 1.000000 1
12 ZG16B 0.7694443 0.8166665 0.444444 1
数据
来自P Lapointe的回答table1