我有一个带有两个参数的函数,一个链接和一个State name缩写。该函数从远程站点获取电子表格 让我成为我想要的状态数据的数据框。
library(lubridate)
library(tidyr)
library(gdata)
library(dplyr)
options(stringsAsFactors = FALSE)
nsw_link <- 'http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101051.xls&3101.0&Time%20Series%20Spreadsheet&3F92BFA30BC29940CA257F1D001427C3&0&Jun%202015&17.12.2015&Latest'
qld_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101053-.xls&3101.0&Time%20Series%20Spreadsheet&2927EBD7E6856BABCA257F1D0014283D&0&Jun%202015&17.12.2015&Latest"
vic_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101052.xls&3101.0&Time%20Series%20Spreadsheet&E3B2958632AB29ECCA257F1D001427FB&0&Jun%202015&17.12.2015&Latest"
nt_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101057.xls&3101.0&Time%20Series%20Spreadsheet&CCB60AB638D60938CA257F1D0014291C&0&Jun%202015&17.12.2015&Latest"
tas_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101056.xls&3101.0&Time%20Series%20Spreadsheet&8CA5625A306A4805CA257F1D001428D7&0&Jun%202015&17.12.2015&Latest"
act_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101058.xls&3101.0&Time%20Series%20Spreadsheet&4234206BA89A82F6CA257F1D00142959&0&Jun%202015&17.12.2015&Latest"
sa_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101054.xls&3101.0&Time%20Series%20Spreadsheet&A342BFFB06A62F4FCA257F1D0014286D&0&Jun%202015&17.12.2015&Latest"
wa_link <- "http://www.abs.gov.au/ausstats/abs@archive.nsf/log?openagent&3101055.xls&3101.0&Time%20Series%20Spreadsheet&DC6208699BE4D2FFCA257F1D0014289E&0&Jun%202015&17.12.2015&Latest"
get_ERP_data <- function(link, state){
# Get the xls file and slice only the columns needed for male and female ERPs
xls_data <- read.xls(link, sheet = 'Data1')
xls_data <- tbl_df(xls_data)
xls_data <- xls_data[, 1:203]
names(xls_data) <- gsub(pattern = "Estimated.Resident.Population....", '', names(xls_data))
names(xls_data) <- gsub('[.]+', '', names(xls_data))
names(xls_data) <- gsub('100andover', '101', names(xls_data))
names(xls_data) <- gsub("(\\d+)$", ".\\1", names(xls_data))
xls_data <- xls_data[28:54, ]
names(xls_data)[1] <- 'Year'
xls_data$Year <- paste('01', xls_data$Year, sep = '-')
xls_data$Year <- dmy(xls_data$Year)
xls_data$Year <- year(xls_data$Year)
# make a long version of the ERP data
xls_data_long <- xls_data %>% gather(Sex_Age, Population, Male.0:Female.101)
# Make two new column, sex and age, from the sex_age column
xls_data_sep_log <- xls_data_long %>% separate(Sex_Age, c('Sex', 'Age'))
# Recode observations from 101 to 'over 100'
xls_data_sep_log$Age[grep(xls_data_sep_log$Age, pattern = '101')] <- 'Over 100'
xls_data_sep_log$State <- state
df_name <- paste(state, 'data', sep = '_')
assign(df_name,xls_data_sep_log,envir = .GlobalEnv)
}
link_list <- list(act_link, nsw_link, nt_link, qld_link, sa_link, tas_link, vic_link, wa_link)
states <- c('ACT', 'NSW', 'NT', 'QLD', 'SA', 'TAS', 'VIC', 'WA')
例如,如果我想要昆士兰估计的常住人口数据,我会运行:
get_ERP_data(qld_link, 'QLD')
这使我成为我需要的数据的纵向数据框架。
我想扩展这个,以便我可以将一个向量或状态列表作为参数传递给链接列表,并为每个链接返回一个数据框 参数中的论点?我并不特别关心在函数输出中绑定结果数据帧。
与上面的例子类似,我如何让我的函数运行如下:
get_ERP_data(as.list(qld_link, nsw_link), c('QLD', 'NSW'))
我尝试使用sapply和do.call的变体但是没有走得太远。有没有办法使用一些相当于pythons * args / ** kwargs 将列表作为参数传递给我的函数?
任何帮助都将不胜感激。