为什么y
最终成为character
班级?似乎永远不会发生sqldf
SUM?
library(sqldf)
# three very similar data.frame objects
x <- structure(list(size = c(1L, 2L), diff = c(1, NA)) , .Names = c("gb","diff"), row.names = 1:2, class = "data.frame")
y <- structure(list(size = c(1L, 1L, 2L, 2L), diff = c(NA, NA, 1, NA)) , .Names = c("gb","diff"), row.names = 1:4, class = "data.frame")
z <- structure(list(size = c(2L, 2L, 1L, 1L), diff = c(NA, NA, 1, NA)) , .Names = c("gb","diff"), row.names = 1:4, class = "data.frame")
# when summed in sqldf: numeric, character, numeric
sapply(sqldf("select sum(diff) from x"),class)
sapply(sqldf("select sum(diff) , gb from y group by gb"),class)[1]
sapply(sqldf("select sum(diff) , gb from z group by gb"),class)[1]
# this despite both being numeric originally
class( x$diff )
class( y$diff )
答案 0 :(得分:3)
在汇总之前排除NA
,(即:NULL
):
out1 <- sqldf("SELECT SUM(diff) AS diff_sum
FROM x
WHERE diff IS NOT NULL")
out2 <- sqldf("SELECT SUM(diff) AS diff_sum, gb
FROM y
WHERE diff IS NOT NULL
GROUP BY gb")
str(out1)
# 'data.frame': 1 obs. of 1 variable:
# $ diff_sum: num 1
str(out2)
# 'data.frame': 1 obs. of 2 variables:
# $ diff_sum: num 1
# $ gb : int 2
答案 1 :(得分:1)
This is the correct way to avoid this.
@ G.Grothendieck:
sqldf有一个启发式方法 将任何输出列上的类设置为与输入相同的类 同名的列,这样就可以解决它:
str(y)
## 'data.frame': 4 obs. of 2 variables:
## $ gb : int 1 1 2 2
## $ diff: num NA NA 1 NA
out1 <- sqldf("select sum(diff) diff, gb from y group by gb")
str(out1)
## 'data.frame': 2 obs. of 2 variables:
## $ diff: num NA 1
## $ gb : int 1 2
out2 <- sqldf("select sum(diff) diff, gb from y group by gb ORDER BY gb desc")
str(out2)
## 'data.frame': 2 obs. of 2 variables:
## $ diff: num 1 NA
## $ gb : int 2 1