我正在从网上抓取数据来做自己的小项目。我的目标是浏览每一个页面(按月和年划分),每天进行一次,并进行多少次呼叫。我从一个空白的数据框开始,然后附加我继续进行的每个月的通话日期和次数。
问题是...当我将行绑定在一起时,日期变成了数字。我该如何解决? (如果我只运行一列代码,则日期格式正确。)
问题将在代码的最后一部分,但我提供了其他内容作为参考。
#Scrapes dates from the police call map
#ex: "Tuesday, Sept. 30"
get_date <- function(url,year){
read_html(url) %>%
html_nodes('.bull li a') %>%
html_text() %>%
str_trim() %>%
unlist() %>%
#adds the current year to the end
paste(year)
}
#Scrapes number of calls for each day.
#ex: "262 calls for service"
get_num_calls <- function(url){
read_html(url) %>%
html_nodes('.bull b') %>%
html_text() %>%
str_trim() %>%
unlist() %>%
#Parse number is here to eliminate the "calls for service" text
parse_number()
}
#This function converts date to proper POSIXct format.
convert <- function(date,month,year){
if (month %in% abbr) {
parse_date_time(date, orders = "%A, %b. %d %Y")
} else if (month %in% full) {
parse_date_time(date, orders = "%A, %B. %d %Y")
} else {
#September is not in an accepted format, so it needs to be changed.
date <- sub("Sept.", "Sep.", date)
parse_date_time(date, orders = "%A, %b. %d %Y")
}
}
#years and months to cycle through the URLs
years <- as.character(2009:2018)
months <- c("jan", "feb","mar","apr", "may", "jun",
"jul", "aug", "sep", "oct", "nov", "dec")
#months that are abbreviated in the data.
abbr <- c("jan", "feb", "aug", "oct", "nov", "dec")
#months that are NOT abbreviated in the data.
full <- c("mar", "april", "may", "jun", "jul")
#url_base will have "month/year/" added to the end in the loop
url_base <- "http://projects.registerguard.com/police/eugene/"
#No matter which of these I use, it doesn't work properly.
Police <- c()
Police <- data.frame(date = as.POSIXct(date(), origin = lubridate::origin),
calls = integer())
for (year in years){
for (month in months){
url <- paste(url_base, year, '/', month, '/', sep = "")
date <- get_date(url,year)
date <- convert(date,month,year)
calls <- get_num_calls(url)
new <- cbind(date, calls)
Police <- rbdf(Police, new)
}
}