任何人都可以帮我修复此代码吗? 我去年使用的脚本没有问题,但现在连接到网址存在问题。
我该如何解决?
我想要的是从气象站“EKAH”(Tirstrup,丹麦奥胡斯机场)收集和安排2015-12-01至2016-04-15的数据。
############## 1) Run function --------------------
wunder_station_daily <- function(station, date)
{
base_url <- 'https://www.wunderground.com/history/airport'
# Example website: https://www.wunderground.com/history/airport/EKAH/2016/06/09/DailyHistory.html?&MR=1
# parse date
m <- as.integer(format(date, '%m'))
d <- as.integer(format(date, '%d'))
y <- format(date, '%Y')
# compose final url
final_url <- paste(base_url,
'/', station,
'/', y,
'/', m,
'/', d,
'/DailyHistory.html?&MR=1', sep='')
# reading in as raw lines from the web server
# contains <br> tags on every other line
# u <- url(final_url)
# the_data <- readLines(u)
# close(u)
the_data <- getURL(final_url, ssl.verifypeer=0L, followlocation=1L)
# only keep records with more than 5 rows of data
if(length(the_data) > 5 )
{
# remove the first and last lines
the_data <- the_data[-c(1, length(the_data))]
# remove odd numbers starting from 3 --> end
the_data <- the_data[-seq(3, length(the_data), by=2)]
# extract header and cleanup
the_header <- the_data[1]
the_header <- make.names(strsplit(the_header, ',')[[1]])
# convert to CSV, without header
tC <- textConnection(paste(the_data, collapse='\n'))
the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
close(tC)
# remove the last column, created by trailing comma
the_data <- the_data[, -ncol(the_data)]
# assign column names
names(the_data) <- the_header
# convert Time column into properly encoded date time
the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))
# remove UTC and software type columns
the_data$DateUTC.br. <- NULL
the_data$SoftwareType <- NULL
# sort and fix rownames
the_data <- the_data[order(the_data$Time), ]
row.names(the_data) <- 1:nrow(the_data)
# done
return(the_data)
}
}
############## 2) Get data for a range of dates ------------------------------
date.range <- seq.Date(from=as.Date('2015-12-01'), to=as.Date('2015-12-04'), by='1 day')
station <- 'EKAH'
# pre-allocate list
l <- vector(mode='list', length=length(date.range))
# loop over dates, and fetch data
for(i in seq_along(date.range))
{
print(paste0("Fetching data: ", date.range[i]))
l[[i]] <- wunder_station_daily('EKAH', date.range[i])
}
# stack elements of list into DF, filling missing columns with NA
d <- ldply(l)
答案 0 :(得分:1)
他们稍微更改了URL,虽然我认为我可能只是为新的URL添加了错误的URL,但这里是大部分代码的现代化版本(我做了99%的清理工作):< / p>
#' @param station station name
#' @param wx_date Date object or character string
#' @param fmt if wx_date is not a Date object and the character string
#' is not in "%Y-%m-%d" format, then specify the format here
#' @return data.frame of redings
get_wx <- function(station="EKAH", wx_date=Sys.Date(), fmt="%Y-%m-%d") {
require(httr)
require(readr)
if (inherits(wx_date, "character")) {
wx_date <- as.Date(wx_date, fmt)
}
wx_base_url <- "https://www.wunderground.com/history/airport/%s/%s/DailyHistory.html"
wx_url <- sprintf(wx_base_url, station, format(wx_date, "%Y/%m/%d"))
res <- httr::GET(wx_url, query=list(MR=1, format=1))
dat <- httr::content(res, as="text")
dat <- gsub("<br />", "", dat)
dat <- read.table(text=dat, sep=",", header=TRUE,
na.strings=c("-", "N/A", "NA"), stringsAsFactors=FALSE)
# saner column names
cols <- colnames(dat)
# via http://stackoverflow.com/a/22528880/1457051
cols <- gsub("([a-z])([A-Z])", "\\1_\\L\\2", cols, perl=TRUE)
cols <- sub("^(_[a-z])", "\\L\\1", cols, perl=TRUE)
cols <- tolower(gsub("\\.", "_", cols))
readr::type_convert(setNames(dat, cols)) # more robust than type.convert()
}
tdy <- get_wx()
str(tdy)
## 'data.frame': 36 obs. of 14 variables:
## $ time_cest : chr "12:00 AM" "12:20 AM" "12:50 AM" "1:00 AM" ...
## $ temperature_f : num 51 50 48.2 47 46.4 44.6 44 44.6 44.6 44 ...
## $ dew_point_f : num 41 41 39.2 39 39.2 39.2 38 39.2 39.2 38 ...
## $ humidity : int 60 71 71 67 76 81 71 81 81 73 ...
## $ sea_level_pressure_in: num 30.1 30.1 30.1 30.1 30.1 ...
## $ visibility_mph : num 28 6.2 6.2 28 6.2 6.2 7 6.2 6.2 28 ...
## $ wind_direction : chr "WNW" "West" "West" "West" ...
## $ wind_speed_mph : chr "2.3" "2.3" "2.3" "2.3" ...
## $ gust_speed_mph : logi NA NA NA NA NA NA ...
## $ precipitation_in : logi NA NA NA NA NA NA ...
## $ events : logi NA NA NA NA NA NA ...
## $ conditions : chr NA "Unknown" "Unknown" NA ...
## $ wind_dir_degrees : int 300 270 270 270 270 270 270 280 280 270 ...
## $ date_utc : POSIXct, format: "2016-06-08 22:00:00" "2016-06-08 22:20:00" ...
a_yr_ago <- get_wx(wx_date="2015-06-09")
如果需要,您可以将""
添加到na.strings
向量。
而且,这是另一种方法,可以将一系列日期的读数转换为data.frame
:
library(purrr)
rng <- map_df(seq(as.Date("2015-12-01"), as.Date("2015-12-04"), "1 day"),
function(x) { get_wx(wx_date=x) })