
时间:2017-01-22 02:48:00

标签: r dplyr


date <- c("2016-03-24","2016-03-24","2016-03-24","2016-03-24","2016-03-24",
location <- c(1,1,2,2,3,3,4,"out","out")
sensor <- c(1,16,1,16,1,16,1,1,16)
Temp <- c(35,34,92,42,21,47,42,63,12)
df <- data.frame(date,location,sensor,Temp)

我的部分数据缺少值。 NA未指明它们。它们不在数据期间。


df %>%
  filter(location %in% c(4, 'out')) %>% 
  group_by(date, sensor) %>% 
  summarize(Diff = Temp[location=="4"] - Temp[location=="out"],
            location = first(location)) %>%
  select(1, 2, 4, 3) 

但是对于缺少日期的数据,我收到以下错误Error: expecting a single value。我认为这是因为dplyr在到达缺失的数据点时不知道该怎么做。


df %>%
  filter(location %in% c(4, 'out')) %>% 
  group_by(date, sensor) %>% 
  do(Diff = Temp[location=="4"] - Temp[location=="out"],
            location = first(location)) %>%
  select(1, 2, 4, 3) 


2 个答案:

答案 0 :(得分:3)


date <- c("2016-03-24", "2016-03-24", "2016-03-24", "2016-03-24", "2016-03-24",
          "2016-03-24", "2016-03-24", "2016-03-24", "2016-03-24")
location <- c(1, 1, 2, 2, 3, 3, 4, "out", "out")
sensor <- c(1, 16, 1, 16, 1, 16, 1, 1, 16)
Temp <- c(35, 34, 92, 42, 21, 47, 42, 63, 12)

df <- data_frame(date, location, sensor, Temp)

# edge case helper
`%||0%` <- function (x, y) { if (is.null(x) | length(x) == 0) y else x }

df %>%
  filter(location %in% c(4,  'out')) %>%
  mutate(location=factor(location, levels=c("4", "out"))) %>%             # make location a factor 
  arrange(sensor, location) %>%                                           # order it so we can use diff()
  group_by(date,  sensor) %>%
  summarize(Diff = diff(Temp) %||0% NA, location = first(location)) %>% # deal with the edge case
  select(1,  2,  4,  3)
## Source: local data frame [2 x 4]
## Groups: date [1]
##         date sensor location  Diff
##        <chr>  <dbl>   <fctr> <dbl>
## 1 2016-03-24      1        4    21
## 2 2016-03-24     16      out    NA

答案 1 :(得分:1)


df %>%
    filter(location %in% c(4, 'out')) %>% 
    group_by(date, sensor) %>%
    arrange(sensor, location) %>%   
    summarise(Diff = if(n()==1) NA else diff(Temp), location = first(location))  %>%
    select(1, 2, 4, 3)
#        date sensor location  Diff
#      <fctr>  <dbl>   <fctr> <dbl>
#1 2016-03-24      1        4    21
#2 2016-03-24     16      out    NA


setDT(df)[location %in% c(4, 'out')][
     order(sensor, location), .(Diff = if(.N==1) NA_real_ else diff(Temp), 
      location = location[1]), .(date, sensor)][, c(1, 2, 4, 3), with = FALSE]
#          date sensor location Diff
#1: 2016-03-24      1        4   21
#2: 2016-03-24     16      out   NA