我有两列x,y的数据帧df。我试图在列y中求和值并将总和放入另一个数据帧。仅对NA值之间的列y的一部分进行求和。列y的多个部分必须求和,但我希望每个总和在新数据框中是一个单独的值。
df <- data.frame(x = c(1966,0.1,0.2,0.3,0.4,5622,0.9,0.8,0.7,0.6,7889),
y = c(NA,1,2,3,4,NA,9,8,7,6,NA))
答案应采用一行两行的数据框格式:
df <- data.frame(x = c(10,30))
我想过使用一些for循环来解决这个问题,并且想法使用列y中NA值之间的值的语句。有什么想法吗?
到目前为止,我有以下代码,但我最终希望它适用于具有一系列两个以上总结的列:
NAs <- which(is.na(df$y))
L1 <- length(NAs)
L0 <- dim(df)[1]
soln1 <- data.frame(matrix(nrow = L1-1, ncol = 1))
for(i in 1:L0){
for(j in 1:L1){
if (j == L1){
break
} else
soln1[j,1] <- sum(df[NAs[j] +1,2]:df[NAs[j+1] -1,2])
}
}
答案 0 :(得分:0)
我用一些假数据刺了一下:
df <- data.frame(x = c(1,1,3,1,3,1,1,1,1,1,3,1,1,1,1,1),
y = c(1,2,NA,4,5,NA,7,8,NA,10,11,NA,13,14,NA,16))
# df
# x y
#1 1 1
#2 1 2
#3 3 NA
#4 1 4
#5 3 5
#6 1 NA
#7 1 7
#8 1 8
#9 1 NA
#10 1 10
#11 3 11
#12 1 NA
#13 1 13
#14 1 14
#15 1 NA
#16 1 16
神奇的功能:
# sum rows in y if section is between NA values & before a value in column x that is > 2
specialSum <- function(x, y){
starting <- which(c(NA,x[-length(x)]) > 2 & is.na(y))
NAs <- which(is.na(y))
L <- length(starting)
ending <- sapply(1:L, function(z) NAs[NAs[-starting] > starting[z]][1])
output <- matrix(NA, nrow = L)
naming <- rep("",L)
for(i in 1:L){
output[i] <- sum(y[starting[i]:ending[i]], na.rm = T)
naming[i] <- paste0(starting[i]+1,":",ending[i]-1)
}
dimnames(output) <- list(naming, "specialSum")
output
}
specialSum(df$x, df$y)
# specialSum
#7:8 15
#13:14 27
修改强>
df <- data.frame(x = c(1966,0.1,0.2,0.3,0.4,5622,0.9,0.8,0.7,0.6,7889),
y = c(NA,1,2,3,4,NA,9,8,7,6,NA))
specialSum <- function(y){
NAs <- which(is.na(y))
starting <- NAs[-length(NAs)]+1
ending <- NAs[-1]-1
L <- length(starting)
sums <- matrix(NA, nrow = L) ; naming <- rep("",L) # initialize for speed
for(i in 1:L){
sums[i] <- sum(y[starting[i]:ending[i]], na.rm = T)
naming[i] <- paste0(starting[i],":",ending[i])
}
sums <- sums[sums != 0,,drop = F] # in case there are multiple NAs in a row
data.frame(specialSum = sums, row.names = naming)
}
specialSum(df$y)
# specialSum
#2:5 10
#7:10 30
修改#2:强>
NAs <- which(is.na(df$y))
sumlist <- vector("list", length(NAs)-1)
count <- 0
for(i in 1:nrow(df)){
if(i %in% NAs){
count = count + 1
} else {
sumlist[[count]] <- append(sumlist[[count]], df$y[i])
}
}
data.frame(specialSum = unlist(lapply(sumlist, sum))) # less pretty output
# specialSum
#1 10
#2 30