我想按天气类型创建日期范围,添加一个新列以提供该天气的天数,再添加两个列以提供平均温度,然后是“ df”的温度总和>
set.seed(121)
df <- data.frame( "Date" = sprintf("%02d-Jan", 1:10), "Type of Weather" = c(rep("Cloudy", 3), rep("Rainy", 2), rep("Cloudy", 4), rep("Sunny", 1)), "Average temperature"= 20:29 )
预期输出:
ndf <- df[, .(Date = paste(Date[1], "to", Date[.N]), weather = Type.of.Weather[1]), rleid(Type.of.Weather) ][, rleid := NULL ][] ndf$Countofdays= c(3,2,4,1) ndf$Averagetemp=c(21,23.5,26.5,29)
答案 0 :(得分:0)
我们可以执行以下操作
parse.date <- function(x) as.POSIXct(x, format = "%d-%b")
df %>%
mutate(grp = cumsum(c(0, diff(as.numeric(as.factor(Type.of.Weather))) != 0))) %>%
group_by(grp) %>%
summarise(
Countofdays = parse.date(Date[length(Date)]) - parse.date(Date[1]) + 1,
Date = paste0(Date[1], " to ", Date[length(Date)]),
weather = unique(Type.of.Weather),
Averagetemp = mean(Average.temperature)) %>%
select(Date, weather, Countofdays, Averagetemp)
## A tibble: 4 x 4
# Date weather Countofdays Averagetemp
# <chr> <fct> <time> <dbl>
#1 01-Jan to 03-Jan Cloudy 3 21.0
#2 04-Jan to 05-Jan Rainy 2 23.5
#3 06-Jan to 09-Jan Cloudy 4 26.5
#4 10-Jan to 10-Jan Sunny 1 29.0
set.seed(121)
df <- data.frame( "Date" = sprintf("%02d-Jan", 1:10), "Type of Weather" = c(rep("Cloudy", 3), rep("Rainy", 2), rep("Cloudy", 4), rep("Sunny", 1)), "Average temperature"= 20:29 )