我一直在尝试从特定网站下载excel文件(.xls)。我在下面粘贴我的整个R代码(在设置一个docker容器之后)。
ePrefs = makeFirefoxProfile(
list(
browser.download.dir = "/home/seluser/Downloads",
"browser.download.folderList" = 2L,
"browser.download.manager.showWhenStarting" = FALSE,
"browser.helperApps.neverAsk.saveToDisk" = "application/vnd.ms-excel,
application/xls, application/x-xls, application/vnd-xls,
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
))
remDr = remoteDriver(extraCapabilities = ePrefs, port = 4445)
remDr$open()
remDr$navigate("https://www.aeaweb.org/joe/listings?")
webelem1 = remDr$findElement(using = 'id', "published-date")
webelem1$clickElement()
webelem2 = remDr$findElement("css", "[value = 'week']")
webelem2$clickElement()
webelem3 = remDr$findElement("css", "[value = 'Apply Filter']")
webelem3$clickElement()
Sys.sleep(10)
webelem4 = remDr$findElement("css", "[feature = 'download']")
webelem4$clickElement()
webelem5 = remDr$findElement("xpath",
"/html/body/main/div/section/div/div[2]/div[2]/div/ul/li[3]/a")
webelem5$clickElement()
一切正常,但在最后一步(点击),selenium浏览器仍打开通常的对话窗口,询问我是否要保存文件或打开它,即使我在eprefs位中有重写命令代码。
我手动下载了最后一次点击应该直接下载并验证内容类型的文件 应用/ vnd.ms-Excel中。有什么我做错了吗?任何帮助表示赞赏。
答案 0 :(得分:0)
服务器返回的mime类型是application/force-download
。将其添加到您的列表中并观察HOST和容器下载位置是否已映射,以下内容适用于我:
# initiate docker container mapping download locations
# here HOST is linux
# docker run -d -p 4445:4444 -p 5901:5900 -v /home/john/test:/home/seluser/Downloads selenium/standalone-firefox-debug:2.53.1
library(RSelenium)
ePrefs <- makeFirefoxProfile(
list(
browser.download.dir = "/home/seluser/Downloads",
"browser.download.folderList" = 2L,
"browser.download.manager.showWhenStarting" = FALSE,
"browser.helperApps.neverAsk.saveToDisk" = "application/vnd.ms-excel,
application/xls, application/x-xls, application/vnd-xls,
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,
application/force-download"
))
remDr <- remoteDriver(extraCapabilities = ePrefs, port = 4445)
remDr$open()
remDr$navigate("https://www.aeaweb.org/joe/listings?")
webelem1 <- remDr$findElement(using = 'id', "published-date")
webelem1$clickElement()
webelem2 <- remDr$findElement("css", "[value = 'week']")
webelem2$clickElement()
webelem3 <- remDr$findElement("css", "[value = 'Apply Filter']")
webelem3$clickElement()
Sys.sleep(10)
webelem4 <- remDr$findElement("css", "[feature = 'download']")
webelem4$clickElement()
webelem5 = remDr$findElement("xpath",
"/html/body/main/div/section/div/div[2]/div[2]/div/ul/li[3]/a")
webelem5$clickElement()
list.files("/home/john/test/")
> list.files("/home/john/test/")
[1] "joe_resultset.xls"