我使用textread包中的read_doc函数编写了一个简单的r脚本来导入doc文件。但是,当它尝试加载中型文件时,会花费很长时间,并且会强制关闭。 该如何处理?
dir <- setwd("Data")
url <- "https://www.pjf.mg.gov.br/e_atos/e_atos.php"
pjf.dados <- function(filtro1, dataArea0, dataArea1) {
dados <- list(filtro1 = "24",
filtro2 = " ",
dataArea0 = "01/01/2010",
dataArea1 = "31/12/2017",
sb_psq2 = "Pesquisar")
}
form <- pjf.dados(filtro1, dataArea0, dataArea1)
# requisicao de busca
res <- POST(url, body = form)
nit <- res %>%
xml2::read_html() %>%
html_nodes('a') %>%
html_attr('href')
nit <- nit[grepl("nit", nit, ignore.case = TRUE)]
atos <- sub("/([^/]*)$", "/", url)
anexos <- sapply(strsplit(nit, split='./', fixed=TRUE), function(x) (x[2]))
links <- gsub(" ", "%20", (paste0(atos, anexos)))
for (i in 1:length(links)) {
if(!file.exists(basename(links[i])))
try(download.file((links[i]), destfile = basename(links[i]), mode="wb"))
}
doc_files <- list.files(".", pattern = "doc", full.names = TRUE)
list_doc <- tryCatch ({
lapply(doc_files, function (i) {
read_doc(i) %>%
str_split(pattern = "\n") %>%
unlist() %>%
str_trim() %>%
str_subset(pattern = ":(\\d+)")
})