如何使用远程selenium webdriver下载文件?

时间:2017-11-02 06:26:50

标签: selenium selenium-webdriver

我正在使用远程selenium webdriver来执行一些测试。但是,在某些时候,我需要下载一个文件并检查其内容。

我正在使用远程webdriver,如下所示(python):

PROXY = ...

prefs = {
    "profile.default_content_settings.popups":0,
    "download.prompt_for_download": "false",
    "download.default_directory": os.getcwd(),
}
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_experimental_option("prefs", prefs)

webdriver.DesiredCapabilities.CHROME['proxy'] = {
  "httpProxy":PROXY,
  "ftpProxy":PROXY,
  "sslProxy":PROXY,
  "noProxy":None,
  "proxyType":"MANUAL",
  "class":"org.openqa.selenium.Proxy",
  "autodetect":False
}
driver = webdriver.Remote(
        command_executor='http://aaa.bbb.ccc:4444/wd/hub',
        desired_capabilities=DesiredCapabilities.CHROME)

正常的' webdriver我能够在本地计算机上没有问题的情况下载文件。然后我可以使用测试代码来例如验证下载文件的内容(可根据测试参数进行更改)。它不是对下载本身的测试,但我需要一种方法来验证生成的文件的内容 ...

但是如何使用远程 webdriver做到这一点?我在任何地方都没有找到任何帮助...

5 个答案:

答案 0 :(得分:6)

Selenium API不提供在远程计算机上下载文件的方法。

但仅依赖浏览器,Selenium仍有可能。

使用Chrome,可以通过导航chrome://downloads/列出下载的文件,并通过页面中执行的file:///请求进行检索:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import os, time, base64

def get_file_names_chrome(driver):
  if not driver.current_url.startswith("chrome://downloads"):
    driver.get("chrome://downloads/")
  return driver.execute_script("""
    return downloads.Manager.get().items_
      .filter(e => e.state === "COMPLETE")
      .map(e => e.file_url);
    """)

def get_file_content_chrome(driver, uri):
  result = driver.execute_async_script("""
    var uri = arguments[0];
    var callback = arguments[1];
    var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
    var xhr = new XMLHttpRequest();
    xhr.responseType = 'arraybuffer';
    xhr.onload = function(){ callback(toBase64(xhr.response)) };
    xhr.onerror = function(){ callback(xhr.status) };
    xhr.open('GET', uri);
    xhr.send();
    """, uri)
  if type(result) == int :
    raise Exception("Request failed with status %s" % result)
  return base64.b64decode(result)

capabilities_chrome = { \
    'browserName': 'chrome',
    # 'proxy': { \
     # 'proxyType': 'manual',
     # 'sslProxy': '50.59.162.78:8088',
     # 'httpProxy': '50.59.162.78:8088'
    # },
    'goog:chromeOptions': { \
      'args': [
        '--disable-extensions',
        '--disable-gpu',
        '--disable-infobars',
        '-–disable-web-security'
      ],
      'prefs': { \
        # 'download.default_directory': "",
        # 'download.directory_upgrade': True,
        'download.prompt_for_download': False,
        'plugins.always_open_pdf_externally': True,
        'safebrowsing_for_trusted_sources_enabled': False
      }
    }
  }

# launch Chrome
# driver = webdriver.Chrome(desired_capabilities=capabilities_chrome)
driver = webdriver.Remote('http://127.0.0.1:5555/wd/hub', capabilities_chrome)
driver.set_script_timeout(30000)

# download a pdf file
driver.get("https://www.mozilla.org/en-US/foundation/documents")
driver.find_element_by_css_selector("[href$='.pdf']").click()

# list all the remote files (waits for at least one)
files = WebDriverWait(driver, 20, 1).until(get_file_names_chrome)

# get the remote file with a GET request executed in the page
content = get_file_content_chrome(driver, files[0])

# save the content in a local file in the working directory
with open(os.path.basename(files[0]), 'wb') as f:
  f.write(content)

使用Firefox,一旦上下文切换到chrome,可以通过使用脚本调用浏览器API直接列出和检索文件:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import os, time, base64

def get_file_names_moz(driver):
  driver.command_executor._commands["SET_CONTEXT"] = ("POST", "/session/$sessionId/moz/context")
  driver.execute("SET_CONTEXT", {"context": "chrome"})
  return driver.execute_async_script("""
    var { Downloads } = Components.utils.import('resource://gre/modules/Downloads.jsm', {});
    Downloads.getList(Downloads.ALL)
      .then(list => list.getAll())
      .then(entries => entries.filter(e => e.succeeded).map(e => e.target.path))
      .then(arguments[0]);
    """)
  driver.execute("SET_CONTEXT", {"context": "content"})

def get_file_content_moz(driver, path):
  driver.execute("SET_CONTEXT", {"context": "chrome"})
  result = driver.execute_async_script("""
    var { OS } = Cu.import("resource://gre/modules/osfile.jsm", {});
    OS.File.read(arguments[0]).then(function(data) {
      var base64 = Cc["@mozilla.org/scriptablebase64encoder;1"].getService(Ci.nsIScriptableBase64Encoder);
      var stream = Cc['@mozilla.org/io/arraybuffer-input-stream;1'].createInstance(Ci.nsIArrayBufferInputStream);
      stream.setData(data.buffer, 0, data.length);
      return base64.encodeToString(stream, data.length);
    }).then(arguments[1]);
    """, path)
  driver.execute("SET_CONTEXT", {"context": "content"})
  return base64.b64decode(result)

capabilities_moz = { \
    'browserName': 'firefox',
    'marionette': True,
    'acceptInsecureCerts': True,
    'moz:firefoxOptions': { \
      'args': [],
      'prefs': {
        # 'network.proxy.type': 1,
        # 'network.proxy.http': '12.157.129.35', 'network.proxy.http_port': 8080,
        # 'network.proxy.ssl':  '12.157.129.35', 'network.proxy.ssl_port':  8080,      
        'browser.download.dir': '',
        'browser.helperApps.neverAsk.saveToDisk': 'application/octet-stream,application/pdf', 
        'browser.download.useDownloadDir': True, 
        'browser.download.manager.showWhenStarting': False, 
        'browser.download.animateNotifications': False, 
        'browser.safebrowsing.downloads.enabled': False, 
        'browser.download.folderList': 2,
        'pdfjs.disabled': True
      }
    }
  }

# launch Firefox
# driver = webdriver.Firefox(capabilities=capabilities_moz)
driver = webdriver.Remote('http://127.0.0.1:5555/wd/hub', capabilities_moz)

# download a pdf file
driver.get("https://www.mozilla.org/en-US/foundation/documents")
driver.find_element_by_css_selector("[href$='.pdf']").click()

# list all the downloaded files (waits for at least one)
files = WebDriverWait(driver, 20, 1).until(get_file_names_moz)

# get the content of the last downloaded file
content = get_file_content_moz(driver, files[0])

# save the content in a local file in the working directory
with open(os.path.basename(files[0]), 'wb') as f:
  f.write(content)

答案 1 :(得分:4)

@FlorentB针对Chrome的答案在Chrome 79版之前有效。对于较新的版本,由于get_downloaded_files不再可用,因此需要更新功能downloads.Manager。但是,此更新版本也应与以前的版本一起使用。

def get_downloaded_files(driver):

  if not driver.current_url.startswith("chrome://downloads"):
    driver.get("chrome://downloads/")

  return driver.execute_script( \
     "return  document.querySelector('downloads-manager')  "
     " .shadowRoot.querySelector('#downloadsList')         "
     " .items.filter(e => e.state === 'COMPLETE')          "
     " .map(e => e.filePath || e.file_path || e.fileUrl || e.file_url); ")

答案 2 :(得分:2)

的webdriver:

如果您使用的是webdriver,则表示您的代码使用内部Selenium客户端和服务器代码与浏览器实例进行通信。下载的文件存储在本地机器上,可以使用java,python,.Net,node.js等语言直接访问本地机器。

远程WebDriver [Selenium-Grid]:

如果您使用远程webdriver意味着您正在使用GRID概念,Gird的主要目的是To distribute your tests over multiple machines or virtual machines (VMs)。从此形式开始,您的代码使用Selenium客户端与Selenium Grid Server进行通信,Selenium Grid Server使用指定的浏览器将指令传递给Registered节点。表单他们的网格节点将把指令从浏览器特定的驱动程序传递给浏览器实例。 此处下载发生在该系统的file-system | hard-disk,但用户无法访问运行浏览器的虚拟机上的文件系统。

  
      
  • 如果我们可以访问该文件使用javascript,那么我们可以convert the file to base64-String并返回到客户端代码。但出于安全原因,Javascript将不允许从磁盘读取文件。

  •   
  • 如果Selenium Grid集线器和节点在同一系统中,并且它们位于公共网络中,那么您可以将下载文件的路径更改为某些公共下载路径,例如../Tomcat/webapps/Root/CutrentTimeFolder/file.pdf 。通过使用公共URL,您可以直接访问该文件。

  •   

例如从tomcat的Root文件夹下载文件[]。

System.out.println("FireFox Driver Path « "+ geckodriverCloudRootPath);
File temp = File.createTempFile("geckodriver",  null);
temp.setExecutable(true);
FileUtils.copyURLToFile(new URL( geckodriverCloudRootPath ), temp);

System.setProperty("webdriver.gecko.driver", temp.getAbsolutePath() );
capabilities.setCapability("marionette", true);
  
      
  • 如果Selenium Grid集线器和节点不在同一系统,您可能无法获取下载的文件,因为Grid Hub将位于公共网络[WAN]中,Node将位于专用网络中该组织的[LAN]
  •   

您可以将浏览器的下载文件路径更改为硬盘上的指定文件夹。使用以下代码。

String downloadFilepath = "E:\\download";

HashMap<String, Object> chromePrefs = new HashMap<String, Object>();
chromePrefs.put("profile.default_content_settings.popups", 0);
chromePrefs.put("download.default_directory", downloadFilepath);
ChromeOptions options = new ChromeOptions();
HashMap<String, Object> chromeOptionsMap = new HashMap<String, Object>();
options.setExperimentalOption("prefs", chromePrefs);
options.addArguments("--test-type");
options.addArguments("--disable-extensions"); //to disable browser extension popup

DesiredCapabilities cap = DesiredCapabilities.chrome();
cap.setCapability(ChromeOptions.CAPABILITY, chromeOptionsMap);
cap.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true);
cap.setCapability(ChromeOptions.CAPABILITY, options);
RemoteWebDriver driver = new ChromeDriver(cap);

@见

答案 3 :(得分:0)

这只是上述@Florent答案的Java版本。在他的大量指导以及一些挖掘和调整的帮助下,我终于能够将其用于Java。我想通过在这里布置它可以节省一些时间。

Firefox

首先,我们需要创建一个自定义的firefox驱动程序,因为我们需要使用Java客户端(自硒-3.141.59起)中未实现的SET_CONTEXT命令

public class CustomFirefoxDriver extends RemoteWebDriver{


    public CustomFirefoxDriver(URL RemoteWebDriverUrl, FirefoxOptions options) throws Exception {
        super(RemoteWebDriverUrl, options);
        CommandInfo cmd = new CommandInfo("/session/:sessionId/moz/context", HttpMethod.POST);
        Method defineCommand = HttpCommandExecutor.class.getDeclaredMethod("defineCommand", String.class, CommandInfo.class);
        defineCommand.setAccessible(true);
        defineCommand.invoke(super.getCommandExecutor(), "SET_CONTEXT", cmd);
    }


    public Object setContext(String context) {
        return execute("SET_CONTEXT", ImmutableMap.of("context", context)).getValue();
    }
}

下面的代码检索下载的.xls文件的内容,并将其另存为文件(temp.xls)在运行Java类的同一目录中。在Firefox中,这非常简单,因为我们可以使用浏览器API

public String getDownloadedFileNameBySubStringFirefox(String Matcher) {

    String fileName = "";

    ((CustomFirefoxDriver) driver).setContext("chrome");

    String script = "var { Downloads } = Components.utils.import('resource://gre/modules/Downloads.jsm', {});"
            + "Downloads.getList(Downloads.ALL).then(list => list.getAll())"
            + ".then(entries => entries.filter(e => e.succeeded).map(e => e.target.path))"
            + ".then(arguments[0]);";

    String fileNameList = js.executeAsyncScript(script).toString();
    String name = fileNameList.substring(1, fileNameList.length() -1);

    if(name.contains(Matcher)) {
        fileName = name;
    }

    ((CustomFirefoxDriver) driver).setContext("content");

    return fileName;
}

public void getDownloadedFileContentFirefox(String fileIdentifier) {

    String filePath = getDownloadedFileNameBySubStringFirefox(fileIdentifier);
    ((CustomFirefoxDriver) driver).setContext("chrome");

    String script = "var { OS } = Cu.import(\"resource://gre/modules/osfile.jsm\", {});" + 
                    "OS.File.read(arguments[0]).then(function(data) {" + 
                    "var base64 = Cc[\"@mozilla.org/scriptablebase64encoder;1\"].getService(Ci.nsIScriptableBase64Encoder);" +
                    "var stream = Cc['@mozilla.org/io/arraybuffer-input-stream;1'].createInstance(Ci.nsIArrayBufferInputStream);" +
                    "stream.setData(data.buffer, 0, data.length);" +
                    "return base64.encodeToString(stream, data.length);" +
                    "}).then(arguments[1]);" ;

    Object base64FileContent = js.executeAsyncScript(script, filePath);//.toString();
    try {
        Files.write(Paths.get("temp.xls"), DatatypeConverter.parseBase64Binary(base64FileContent.toString()));
    } catch (IOException i) {
        System.out.println(i.getMessage());
    }

}

Chrome

我们需要采用其他方法来实现Chrome中的相同目标。我们将一个输入文件元素附加到“下载”页面,并将文件位置传递给此元素。一旦该元素指向我们所需的文件,我们就可以使用它来读取其内容。

public String getDownloadedFileNameBySubStringChrome(String Matcher) {
    String file = "";
    //The script below returns the list of files as a list of the form '[$FileName1, $FileName2...]'
    // with the most recently downloaded file listed first.
    String script = "return downloads.Manager.get().items_.filter(e => e.state === 'COMPLETE').map(e => e.file_url);" ;
    if(!driver.getCurrentUrl().startsWith("chrome://downloads/")) {
        driver.get("chrome://downloads/");
        }
    String fileNameList =  js.executeScript(script).toString();
    //Removing square brackets
    fileNameList = fileNameList.substring(1, fileNameList.length() -1);
    String [] fileNames = fileNameList.split(",");
    for(int i=0; i<fileNames.length; i++) {
        if(fileNames[i].trim().contains(Matcher)) {
            file = fileNames[i].trim();
            break;
        }
    }

    return file;

}


public void getDownloadedFileContentChrome(String fileIdentifier) {

    //This causes the user to be navigated to the Chrome Downloads page
    String fileName = getDownloadedFileNameBySubStringChrome(fileIdentifier);
    //Remove "file://" from the file path
    fileName = fileName.substring(7);

    String script =  "var input = window.document.createElement('INPUT'); " +
            "input.setAttribute('type', 'file'); " +
            "input.setAttribute('id', 'downloadedFileContent'); " +
            "input.hidden = true; " +
            "input.onchange = function (e) { e.stopPropagation() }; " +
            "return window.document.documentElement.appendChild(input); " ;
    WebElement fileContent = (WebElement) js.executeScript(script);
    fileContent.sendKeys(fileName);

    String asyncScript = "var input = arguments[0], callback = arguments[1]; " +
            "var reader = new FileReader(); " +
            "reader.onload = function (ev) { callback(reader.result) }; " +
            "reader.onerror = function (ex) { callback(ex.message) }; " +
            "reader.readAsDataURL(input.files[0]); " +
            "input.remove(); " ;

    String content = js.executeAsyncScript(asyncScript, fileContent).toString();
    int fromIndex = content.indexOf("base64,") +7 ;
    content = content.substring(fromIndex);

    try {
        Files.write(Paths.get("temp.xls"), DatatypeConverter.parseBase64Binary(content));
    } catch (IOException i) {
        System.out.println(i.getMessage());
    }

}

之所以需要此设置,是因为我的测试套件在Jenkin的服务器上运行;并且它指向的Selenium Grid集线器和Node设置在另一台服务器上的Docker容器(https://github.com/SeleniumHQ/docker-selenium)中运行。再次,这只是上述@Florent答案的Java翻译。请参考以获取更多信息。

答案 4 :(得分:-1)

这适用于2020年适用于Chrome的PHP php-webdriver:

$downloaddir = "/tmp/";
$host = 'http://ipaddress:4444/wd/hub';
try {
    $options = new ChromeOptions();
    $options->setExperimentalOption("prefs",["safebrowsing.enabled" => "true", "download.default_directory" => $downloaddir]);
    $options->addArguments( array("disable-extensions",'safebrowsing-disable-extension-blacklist','safebrowsing-disable-download-protection') );
    $caps = DesiredCapabilities::chrome();
    $caps->setCapability(ChromeOptions::CAPABILITY, $options);
    $caps->setCapability("unexpectedAlertBehaviour","accept");
    $driver = RemoteWebDriver::create($host, $caps);
    $driver->manage()->window()->setPosition(new WebDriverPoint(500,0));
    $driver->manage()->window()->setSize(new WebDriverDimension(1280,1000));
    $driver->get("https://file-examples.com/index.php/sample-documents-download/sample-rtf-download/");
    sleep(1);
    $driver->findElement(WebDriverBy::xpath("//table//tr//td[contains(., 'rtf')]//ancestor::tr[1]//a"))->click();
    sleep(1);
    $driver->get('chrome://downloads/');
    sleep(1);
    // $inject = "return downloads.Manager.get().items_.filter(e => e.state === 'COMPLETE').map(e => e.filePath || e.file_path); ";
    $inject = "return document.querySelector('downloads-manager').shadowRoot.querySelector('downloads-item').shadowRoot.querySelector('a').innerText;";
    $filename = $driver->executeScript(" $inject" );
    echo "File name: $filename<br>";
    $driver->executeScript( 
        "var input = window.document.createElement('INPUT'); ".
        "input.setAttribute('type', 'file'); ".
        "input.hidden = true; ".
        "input.onchange = function (e) { e.stopPropagation() }; ".
        "return window.document.documentElement.appendChild(input); " );
    $elem1 = $driver->findElement(WebDriverBy::xpath("//input[@type='file']"));
    $elem1->sendKeys($downloaddir.$filename);
    $result = $driver->executeAsyncScript( 
        "var input = arguments[0], callback = arguments[1]; ".
        "var reader = new FileReader(); ".
        "reader.onload = function (ev) { callback(reader.result) }; ".
        "reader.onerror = function (ex) { callback(ex.message) }; ".
        "reader.readAsDataURL(input.files[0]); ".
        "input.remove(); "
        , [$elem1]);
    $coding = 'base64,';
    $cstart = strpos( $result, 'base64,' );
    if ( $cstart !== false ) 
        $result = base64_decode(substr( $result, $cstart + strlen($coding) ));
    echo "File content: <br>$result<br>";
    $driver->quit();
} catch (Exception $e) {
    echo 'Caught exception: ',  $e->getMessage(), "\n";
}