在列

时间:2017-06-21 21:29:31

标签: r

我有两列x,y的数据帧df。我试图在列y中求和值并将总和放入另一个数据帧。仅对NA值之间的列y的一部分进行求和。列y的多个部分必须求和,但我希望每个总和在新数据框中是一个单独的值。

df <- data.frame(x = c(1966,0.1,0.2,0.3,0.4,5622,0.9,0.8,0.7,0.6,7889), 
             y = c(NA,1,2,3,4,NA,9,8,7,6,NA))

答案应采用一行两行的数据框格式:

df <- data.frame(x = c(10,30))

我想过使用一些for循环来解决这个问题,并且想法使用列y中NA值之间的值的语句。有什么想法吗?

到目前为止,我有以下代码,但我最终希望它适用于具有一系列两个以上总结的列:

NAs <- which(is.na(df$y))
L1 <- length(NAs)
L0 <- dim(df)[1]
soln1 <- data.frame(matrix(nrow = L1-1, ncol = 1))
for(i in 1:L0){
for(j in 1:L1){
if (j == L1){
break
} else 
soln1[j,1] <- sum(df[NAs[j] +1,2]:df[NAs[j+1] -1,2])
}
}

1 个答案:

答案 0 :(得分:0)

我用一些假数据刺了一下:

df <- data.frame(x = c(1,1,3,1,3,1,1,1,1,1,3,1,1,1,1,1), 
                 y = c(1,2,NA,4,5,NA,7,8,NA,10,11,NA,13,14,NA,16))
#    df
#   x  y
#1  1  1
#2  1  2
#3  3 NA
#4  1  4
#5  3  5
#6  1 NA
#7  1  7
#8  1  8
#9  1 NA
#10 1 10
#11 3 11
#12 1 NA
#13 1 13
#14 1 14
#15 1 NA
#16 1 16

神奇的功能:

# sum rows in y if section is between NA values & before a value in column x that is > 2
specialSum <- function(x, y){
  starting <- which(c(NA,x[-length(x)]) > 2 & is.na(y))
  NAs <- which(is.na(y))
  L <- length(starting)
  ending <- sapply(1:L, function(z) NAs[NAs[-starting] > starting[z]][1])
  output <- matrix(NA, nrow = L)
  naming <- rep("",L)
  for(i in 1:L){
    output[i] <- sum(y[starting[i]:ending[i]], na.rm = T)
    naming[i] <- paste0(starting[i]+1,":",ending[i]-1)
  }
  dimnames(output) <- list(naming, "specialSum")
  output
}
specialSum(df$x, df$y)
#      specialSum
#7:8           15
#13:14         27

修改

df <- data.frame(x = c(1966,0.1,0.2,0.3,0.4,5622,0.9,0.8,0.7,0.6,7889), 
                 y = c(NA,1,2,3,4,NA,9,8,7,6,NA))

specialSum <- function(y){
  NAs      <- which(is.na(y))
  starting <- NAs[-length(NAs)]+1
  ending   <- NAs[-1]-1
  L        <- length(starting)
  sums <- matrix(NA, nrow = L) ; naming <- rep("",L) # initialize for speed
  for(i in 1:L){
    sums[i]   <- sum(y[starting[i]:ending[i]], na.rm = T)
    naming[i] <- paste0(starting[i],":",ending[i])
  }
  sums <- sums[sums != 0,,drop = F] # in case there are multiple NAs in a row
  data.frame(specialSum = sums, row.names = naming)
}
specialSum(df$y)
#     specialSum
#2:5          10
#7:10         30

修改#2:

NAs <- which(is.na(df$y))
sumlist <- vector("list", length(NAs)-1)
count <- 0
for(i in 1:nrow(df)){
  if(i %in% NAs){
    count = count + 1
  } else {
    sumlist[[count]] <- append(sumlist[[count]], df$y[i])
  }
}
data.frame(specialSum = unlist(lapply(sumlist, sum))) # less pretty output
#  specialSum
#1         10
#2         30