通过Docker Toolbox for Windows使用RSelenium与selenium / standalone-firefox-debug容器 - 一切正常:
docker run -d -v //c/test/://home/seluser/Downloads -p 4445:4444 -p 5901:5900 selenium/standalone-firefox-debug
设置firefox个人资料直接下载pdf:
fprof <- makeFirefoxProfile(list(browser.startup.homepage = "about:blank"
, startup.homepage_override_url = "about:blank"
, startup.homepage_welcome_url = "about:blank"
, startup.homepage_welcome_url.additional = "about:blank"
, browser.download.dir = "/home/seluser/Downloads"
, browser.download.folderList = 2L
, browser.download.manager.showWhenStarting = FALSE
, browser.download.manager.focusWhenStarting = FALSE
, browser.download.manager.closeWhenDone = TRUE
, browser.helperApps.neverAsk.saveToDisk = "application/pdf, application/octet-stream"
, pdfjs.disabled = TRUE
, plugin.scan.plid.all = FALSE
, plugin.scan.Acrobat = 99L))
使用下面的代码,当我直接导航到pdf时,它会下载到指定的目录,但之后会挂起,不允许任何进行中的代码执行。
library(RSelenium)
remDr <- remoteDriver(remoteServerAddr = "*docker-ip*", port = 4445L, extraCapabilities = fprof)
remDr$open()
remDr$navigate("http://www.equibase.com/premium/eqbPDFChartPlus.cfm?RACE=A&BorP=P&TID=BEL&CTRY=USA&DT=09/12/2015&DAY=D&STYLE=EQB")
我必须手动停止R代码,显示的错误是:
Error in checkError(res) :
Undefined error in httr call. httr output: Operation was aborted by an application callback
如果我进入容器VNC并查看浏览器中显示的内容,该文件已下载,但地址栏中没有任何内容。
screenshot 有任何想法吗?我假设它与httr / rselenium包没有收到某种“加载”和#39;来自浏览器的信号,但这超出了我的故障排除能力。此方法以前使用.jar文件selenium-standalone-server和RSelenium。
sessionInfo()&amp; remDr $ open()输出如下:
> sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] RSelenium_1.7.1
loaded via a namespace (and not attached):
[1] httr_1.2.1 R6_2.2.0 assertthat_0.1 tools_3.3.2 wdman_0.2.2 binman_0.1.0
[7] curl_2.3 Rcpp_0.12.9 jsonlite_1.2 caTools_1.17.1 openssl_0.9.6 bitops_1.0-6
[13] semver_0.2.0 XML_3.98-1.5
> remDr$open()
[1] "Connecting to remote server"
$rotatable
[1] FALSE
$raisesAccessibilityExceptions
[1] FALSE
$firefoxOptions
$firefoxOptions$args
list()
$firefoxOptions$profile
[1] "UEsDBBQACAgIAEwPW0oAAAAAAAAAAAAAAAAIAAAAcHJlZnMuanOlkU9LAzEQxe+C36HsSaFmwVv1tNBjbwoey2wy242dZsJM0v36JqJYimWV3vLn/R4z72VF2UbB4a7phadyM5pAUo5m5ANG2GGzXDTQc05PPUHYN/fPtzf5BzuXb/mIIt7hNgv9l52QbDlfiRpwzifPAeZcvnd2PAVicMZ5qUhbbVtFqtp2/fWrs/jA5FA2XlNxeZxTHyCUyUviI09vI4aXupMPu8IOQIp/5Qe2Wa8xsMSK1WDNofadJF9iR6SI0sWoJmBputO9UTjiK6+97j/jjpG8hZp/G92wXJw+sE2YHjQJwuE8zSJ+19KAQk/ofh8jUt75YNRCMMXVGSC6sO2ptLPCPdRSVqsq+wBQSwcII+hBcQsBAAD3AgAAUEsDBBQACAgIAEwPW0oAAAAAAAAAAAAAAAAHAAAAdXNlci5qc51WTW/bMAy971cMOW3AKqTretlOXdcBA4Z1aFDsKMgSbauRJU0fcfPvR/mjSRNHbndKbJMS+fj4yOjBUeugfLconGnxiXhWQvdf6oo0TLXMAQHNCgVi8eFtyZSH91/exJ2nYAFtrHEhudTAVKj7Z4JGG8ln/DWE1rg1qUOwxNbS19uz9Nky788U6CrU6Pjx8vK52xiwAybwR0AAHkB8l86HK4yFK0C34OJhuKbBvB4pr51pgHrupA3URU2DbJLLxXL6osAKTxAOfauvlfEwnc1oLUyrlWEC79KsSsDWpv1Tg14hWgmpaXeLQdng02W0MYKpGexhE4xRnoBzxnGjvVH7cB+n72WljUbUGmgKcKvu0edz8eC9RKtgkAsOfETcSgyUcsd8nfdVUq+JsaApPAZwmqlUzFczqExlvYt6+rIWCuHkBp8Z54DljBoz90gHysEFP4nEU6Wkt4ptQdycL1e/DDInlfbTtDG+Erf6j9RYX3++JBIvMvd3P9FjwQoTw+dCMb1... <truncated>
$appBuildId
[1] "20170125094131"
$version
[1] ""
$platform
[1] "LINUX"
$proxy
named list()
$command_id
[1] 1
$nativeEvents
[1] TRUE
$specificationLevel
[1] 0
$acceptSslCerts
[1] FALSE
$processId
[1] 3012
$webdriver.remote.sessionid
[1] "6263b5ab-9375-425e-aa00-8fc632dc492e"
$browserVersion
[1] "51.0.1"
$platformVersion
[1] "4.4.47-boot2docker"
$XULappId
[1] "{ec8030f7-c20a-464f-9b0e-13a3a9e97384}"
$browserName
[1] "firefox"
$takesScreenshot
[1] TRUE
$javascriptEnabled
[1] TRUE
$takesElementScreenshot
[1] TRUE
$platformName
[1] "linux"
$cssSelectorsEnabled
[1] TRUE
$firefox_profile
[1] "UEsDBBQAAgAIAJRZW0oj6EFxCwEAAPcCAAAIAAAAcHJlZnMuanOlkU9LAzEQxe+C36HsSaFmwVv1tNBjbwoey2wy242dZsJM0v36JqJYimWV3vLn/R4z72VF2UbB4a7phadyM5pAUo5m5ANG2GGzXDTQc05PPUHYN/fPtzf5BzuXb/mIIt7hNgv9l52QbDlfiRpwzifPAeZcvnd2PAVicMZ5qUhbbVtFqtp2/fWrs/jA5FA2XlNxeZxTHyCUyUviI09vI4aXupMPu8IOQIp/5Qe2Wa8xsMSK1WDNofadJF9iR6SI0sWoJmBputO9UTjiK6+97j/jjpG8hZp/G92wXJw+sE2YHjQJwuE8zSJ+19KAQk/ofh8jUt75YNRCMMXVGSC6sO2ptLPCPdRSVqsq+wBQSwECHgAUAAIACACUWVtKI+hBcQsBAAD3AgAACAAAAAAAAAABACAAAAAAAAAAcHJlZnMuanNQSwUGAAAAAAEAAQA2AAAAMQEAAAAA"
$id
[1] "6263b5ab-9375-425e-aa00-8fc632dc492e"
答案 0 :(得分:0)
使用最新版本的firefox(51.0.1)时遇到了同样的问题。
这是在Windows机器上,问题似乎是pdfjs.disabled
标志。旧版本的Firefox没有出现此问题。标记为2.53.1
的Docker镜像运行firefox 47。如果可能的话,使用(在Linux机器上)运行旧版本:
docker run -d -p 4445:4444 -p 5901:5900 -v /home/john/test:/home/seluser/Downloads selenium/standalone-firefox-debug:2.53.1
现在运行您的代码,我们看到:
fprof <- makeFirefoxProfile(list(browser.startup.homepage = "about:blank"
, startup.homepage_override_url = "about:blank"
, startup.homepage_welcome_url = "about:blank"
, startup.homepage_welcome_url.additional = "about:blank"
, browser.download.dir = "/home/seluser/Downloads"
, browser.download.folderList = 2L
, browser.download.manager.showWhenStarting = FALSE
, browser.download.manager.focusWhenStarting = FALSE
, browser.download.manager.closeWhenDone = TRUE
, browser.helperApps.neverAsk.saveToDisk = "application/pdf, application/octet-stream"
, pdfjs.disabled = TRUE
, plugin.scan.plid.all = FALSE
, plugin.scan.Acrobat = 99L))
library(RSelenium)
remDr <- remoteDriver(port = 4445L, extraCapabilities = fprof)
remDr$open()
remDr$navigate("http://www.equibase.com/premium/eqbPDFChartPlus.cfm?RACE=A&BorP=P&TID=BEL&CTRY=USA&DT=09/12/2015&DAY=D&STYLE=EQB")
> list.files("/home/john/test/")
[1] "eqbPDFChartPlus.cfm"
需要重命名pdf(将其命名为colfusion .cfm文件)
至于更新版本的firefox会发生什么,你需要将其引用到geckodriver项目。拥有RSelenium
以外客户的用户也遇到了最近的问题Can't download PDF with selenium webdriver + firefox