我尝试通过以下命令下载数千个SEC文件:
download.file(link, folder, method = "internal", quiet = FALSE,
mode = "wb", cacheOK = TRUE,
extra = getOption("download.file.extra"))
过了一会儿,我收到了以下无法解释的消息:
https://dl.dropboxusercontent.com/u/4149177/Capture.PNG
似乎文件已成功下载,但我想知道该消息的含义。
你能告诉我R试图告诉我什么吗?
完整代码:
setInternet2(use = FALSE)
destinationfolder <- getwd()
startyear <- 2000
stopyear <- 2000
startquarter <- 1
stopquarter <- 2
filetype <- "10-Q"
func.getsecindexfile<- function(year, quarter) {
#### download the zipped index file from the SEC website
tf <- tempfile()
result <- try(download.file(url=paste("http://www.sec.gov/Archives/edgar/full-index/", year,"/QTR", quarter, "/company.zip",sep=""), destfile=tf))
#### if we didn't encounter and error downloading the file, parse it and return as a R data frame
if (!inherits(result, "try-error")) {
#### small function to remove leading and trailing spaces
trim <- function (string) {
string <- enc2native(string)
gsub("^\\s*(.*?)\\s*$","\\1", string, perl=TRUE)
}
#### read the downloaded file
raw.data <- readLines(con=(zz<- unz(description=tf, filename="company.idx")))
close(zz)
#### remove the first 10 rows
raw.data <- raw.data[11:length(raw.data)]
#### parse the downloaded file and return the extracted data as a data frame
company_name <- trim(substr(raw.data,1,62))
form_type <- trim(substr(raw.data,63,74))
cik <- trim(substr(raw.data,75,86))
date_filed <- as.Date(substr(raw.data,87,98))
file_name <- trim(substr(raw.data,99,150))
rm(raw.data)
return(data.frame(company_name, form_type, cik, date_filed, file_name))
}
else {return(NULL)}
}
#### add index files to database
func.addindexfiletodatabase <- function(data){
if (is.null(data)) return(NULL)
rs <- dbWriteTable(sqlite, "filings", data, append=TRUE)
return(rs)
}
dbGetQuery(sqlite, "DROP TABLE IF EXISTS filings")
for (year in startyear:stopyear){
for (quarter in startquarter:stopquarter){
func.addindexfiletodatabase(func.getsecindexfile(year, quarter))
}
}
selection <- paste("SELECT * FROM filings WHERE form_type IN ('", filetype, "')", sep = "")
index <- dbGetQuery(sqlite, selection)
pre <- c("ftp://ftp.sec.gov/")
index <- cbind(index,pre)
temp <- paste(index$pre, index$file_name, sep = "")
index <- cbind(index,temp)
index$name_new <- index$temp
index$name_new <- gsub("ftp://ftp.sec.gov/edgar/data/","",index$name_new)
index$name_new <- gsub("/","-",index$name_new)
name <- paste(index$name_new)
link <- paste(index$temp, sep = "")
index$pre <- NULL
index$temp <- NULL
#### define download function
func.download_files <- function(link,name) {
folder <- paste(destinationfolder, "\\", name, sep="")
download.file(link, folder, method="internal", quiet = FALSE, mode = "wb", cacheOK = TRUE, extra = getOption("download.file.extra"))
}
#### download the files
mapply(FUN = func.download_files,link=link,name=name)
答案 0 :(得分:0)
“错误”是成功下载文件的通知。感谢您的帮助。