我使用了以下脚本:
installed.packages('rvest')
library(rvest)
library(tidyverse)
## CONSTANT ----
URL <- "https://www.sciencebase.gov/catalog/item/54471eb5e4b0f888a81b82ca"
dir_out <- "~/condor"
## MAIN ----
# Get the webpage content
webpage <- read_html(URL)
# Extract the information of interest from the website
data <- html_nodes(webpage, ".sb-file-get sb-download-link")
# Grab the base URLs to download all the referenced data
url_base <- html_attr(data,"href")
# Filter the zip files
shapefile_base <- grep("*.zip",url_base, value=TRUE)
# Fix the double `//`
shapefile_fixed <- gsub("//", "/", shapefile_base)
# Add the URL prefix
shapefile_full <- paste0("https://www.sciencebase.gov/",shapefile_fixed)
# Create the output directory
dir.create(dir_out, showWarnings = FALSE)
# Create a list of filenames
filenames_full <- file.path(dir_out,basename(shapefile_full))
# Download the files
lapply(shapefile_full, FUN=function(x) download.file(x, file.path(dir_out,basename(x))))
# Unzip the files
unzip(filenames_full, overwrite = TRUE)
但是对于下载文件部分,我收到以下错误
> lapply(shapefile_full, FUN=function(x) download.file(x, file.path(dir_out,basename(x))))
trying URL 'https://www.sciencebase.gov/'
Content type 'text/html' length unknown
downloaded 111 bytes
[[1]]
[1] 0
> # Unzip the files
> unzip(filenames_full, overwrite = TRUE)
Warning message:
In unzip(filenames_full, overwrite = TRUE) :
error 1 in extracting from zip file
答案 0 :(得分:0)
这对我有用,并在我的工作目录中生成 6 个 zip 文件。
library(rvest)
URL <- "https://www.sciencebase.gov/catalog/item/54471eb5e4b0f888a81b82ca"
webpage <- read_html(URL)
# Extract the information of interest from the website
data <- html_nodes(webpage, "span.sb-file-get")
# Grab the base URLs to download all the referenced data
url_base <- html_attr(data,"data-url")
# Filter the zip files
shapefile_full <- paste0("https://www.sciencebase.gov",url_base)
Map(download.file, shapefile_full, sprintf('file%d.zip', seq_along(shapefile_full)))