R中每5分钟到每小时摘要的降水值

时间:2014-10-02 07:38:51

标签: r duplicates weather hour

我试图从我使用weatherData包的个人气象站获取每小时的总降水量值。我遇到的问题是每五分钟收集一次数据,并且值会重复,直到降水值发生变化。我已经尝试了重复的'功能,但我没有降水时删除大量数据,这使我很难得到每小时降水的摘要。

请参阅以下代码

## Load required libraries

library(weatherData)
library(ggplot2)
library(scales)
library(plyr)
library(reshape2)
library(gridExtra)
library(lubridate)
library(weathermetrics)
library(zoo)

# Get data for PWS using weatherData package

pws <- getWeatherForDate("IPENANGB2", "2014-09-01","2014-09-30", station_type = "id",opt_detailed=T, opt_custom_columns=T, custom_columns=c(1,2,6,7,10))

# Rename columns
colnames(pws)<-c("time","time1","tempc","wdd","wspd","prcp")


## Adding date columns

pws$time<-as.POSIXct(pws$time1,format="%Y-%m-%d %H:%M:%S",tz="Australia/Perth")
pws$year <- as.numeric(format(pws$time,"%Y"))
pws$date <-as.Date(pws$time,format="%Y-%m-%d",tz="Australia/Perth")
pws$year <- as.numeric(as.POSIXlt(pws$date)$year+1900)
pws$month <- as.numeric(as.POSIXlt(pws$date)$mon+1)
pws$monthf <- factor(pws$month,levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)
pws$weekday <- as.POSIXlt(pws$date)$wday
pws$weekdayf <- factor(pws$weekday,levels=rev(0:6),labels=rev(c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")),ordered=TRUE)
pws$yearmonth <- as.yearmon(pws$date)
pws$yearmonthf <- factor(pws$yearmonth)
pws$week <- as.numeric(format(as.Date(pws$date),"%W"))
pws$weekf<- factor(pws$week)
pws$jday<-yday(pws$date)
pws$hour <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%H"))
pws$min <- as.numeric(format(strptime(pws$time, format = "%Y-%m-%d %H:%M"),format = "%M"))

# Remove duplicate values

pws.df <- pws[!duplicated(pws$prcp),]

1 个答案:

答案 0 :(得分:1)

假设你想得到每小时的tempc,wdd,wspd,prcp:

# used packages
library(weatherData)
library(lubridate)
library(dplyr)
library(stringr)

# read data
pws <- getWeatherForDate("IPENANGB2",
                         "2014-09-01",
                         "2014-09-30", 
                         station_type = "id",
                         opt_detailed = T, 
                         opt_custom_columns = T, 
                         custom_columns = c(1, 2, 6, 7, 10))
# rename columns
colnames(pws) <- c("time", "time1", "tempc", "wdd", "wspd", "prcp")

# cleaning dataset and adding some columns
useful_pws <-
    pws %>%
    select(2:6) %>%
    filter(!str_detect(time1, "<br>")) %>%
    mutate(time1 = ymd_hms(time1),
           year = year(time1),
           month = month(time1),
           day = day(time1),
           hour = hour(time1)) %>%
    tbl_df()

# summarising dataset
useful_pws %>%
    select(-time1) %>%
    group_by(year, month, day, hour) %>%
    summarise(tempc = mean(tempc, na.rm = TRUE),
              wdd = mean(wdd, na.rm = TRUE),
              wspd = mean(wspd, na.rm = TRUE),
              prcp = mean(prcp, na.rm = TRUE))