我有一个很大的数据框架。我想计算"pos"
列中上一行的行之间的差异,仅当列等于1时处理行(例如df$X1x8 == 1
),然后找出最大值"chr"
列和nlevels(df$bin)
的每个级别的差异。我需要R专家来解决我的问题。欣赏它。
示例:如果在colum X1x8
上工作(第4列到第8列只能得到0或1)
df = read.table(text=" mi chr pos X1x3 X1x4 X1x8 X2x3 X2x4 sum bin
S000001 1 12578 0 1 1 0 1 3 1
S000003 1 96483 0 0 1 0 0 1 2
S000007 1 238450 1 0 1 1 0 3 3
S000010 1 298018 0 0 1 0 0 1 3
S000015 1 471895 0 1 0 1 1 3 4
S000022 1 704591 0 0 1 0 0 1 5
S000023 1 757938 1 0 1 1 1 4 5
S000002 2 47285 0 0 1 0 1 2 6
S000005 2 145243 0 1 1 0 0 2 6
S000009 2 246104 1 0 1 1 0 3 7
S000011 2 319046 0 1 1 0 0 2 7
S000018 2 566163 1 0 0 1 1 3 9", header=T, stringsAsFactors=F)
df <- df[order(df$chr,df$pos),] # to make sure entries are in right order
dat <- df[!(df$X1x8==0),] # only calculate df$column == 1 rows
dat = dat[,c(1:3,6,10)] # is just example for working column 6
bin <- nlevels(as.factor(dat$bin))
# the first dat$pos of each level of "chr" should give to the first dat$diff, then the rest of values are filled with the following code.
data.frame(diff(as.matrix(dat$pos)))
max(dat$diff)
my expect the result looks like:
#chr 1x3 1x4 1x8 2x3 2x4
#gab 1 519488 459317 406573 286043 459317
#bin 1 2 2 4 3 3
#gab 2 320059 173803 100861 320059 518878
#bin 2 2 2 2 2 2
答案 0 :(得分:1)
所以你提供了实际的预期输出(我已经更新了你的问题),所以这里是更新后的代码
dat <- split(df, df$chr)
vars <- grep("X", names(df), value = T)
dat2 <- data.frame(gap = numeric(length(unique(df$chr))*length(vars)) , bin = numeric(length(unique(df$chr))*length(vars)))
k <- 0
for (i in 1:length(dat)){
for(j in vars){
k <- k + 1
dat2$gap[k] <- max(diff(dat[[as.character(i)]][dat[[as.character(i)]][j] == 1, "pos"]))
dat2$bin[k] <- nlevels(as.factor(dat[[as.character(i)]][dat[[as.character(i)]][j] == 1, "bin"]))
}
}
dat3 <- data.frame(chr = paste(c("gab", "bin"), rep(unique(df$chr), each = 2)))
dat3 <- as.data.frame(cbind(dat3, t(do.call(cbind, split(dat2, gl(length(unique(df$chr)), length(vars)))))))
names(dat3) <- c("chr", gsub("^X", "", vars))
row.names(dat3) <- NULL
dat3
## chr 1x3 1x4 1x8 2x3 2x4
##1 gab 1 519488 459317 406573 286043 459317
##2 bin 1 2 2 4 3 3
##3 gab 2 320059 173803 100861 320059 518878
##4 bin 2 2 2 2 2 2