我想像这张图片一样,使用硒从网页上下载嵌入式PDF。 Embedded PDF image
我尝试了下面提到的代码,但是没有成功。
def download_pdf(lnk):
from selenium import webdriver
from time import sleep
options = webdriver.ChromeOptions()
download_folder = "/*My folder*/"
profile = {"plugins.plugins_list": [{"enabled": False,
"name": "Chrome PDF Viewer"}],
"download.default_directory": download_folder,
"download.extensions_to_open": ""}
options.add_experimental_option("prefs", profile)
print("Downloading file from link: {}".format(lnk))
driver = webdriver.Chrome('/*Path of chromedriver*/',chrome_options = options)
driver.get(lnk)
imp_by1 = driver.find_element_by_id("secondaryToolbarToggle")
imp_by1.click()
imp_by = driver.find_element_by_id("secondaryDownload")
imp_by.click()
print("Status: Download Complete.")
driver.close()
download_pdf('https://www.sebi.gov.in/enforcement/orders/jun-2019/adjudication-order-in-respect-of-three-entities-in-the-matter-of-prism-medico-and-pharmacy-ltd-_43323.html')
感谢您的帮助。
提前谢谢!
答案 0 :(得分:0)
在这里,代码中的描述:
= ^ .. ^ =
from selenium import webdriver
import os
# initialise browser
browser = webdriver.Chrome(os.getcwd()+'/chromedriver')
# load page with iframe
browser.get('https://www.sebi.gov.in/enforcement/orders/jun-2019/adjudication-order-in-respect-of-three-entities-in-the-matter-of-prism-medico-and-pharmacy-ltd-_43323.html')
# find pdf url
pdf_url = browser.find_element_by_tag_name('iframe').get_attribute("src")
# load page with pdf
browser.get(pdf_url)
# download file
download = browser.find_element_by_xpath('//*[@id="download"]')
download.click()
答案 1 :(得分:0)
这是无需单击/下载即可获取文件的另一种方法。如果在Selenium Grid(远程节点)中执行测试,此方法还可以帮助您将文件下载到本地计算机上。
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
public class FileDownloader extends MyPage(){
public void downloadFile(){
//grab the file download url from your download icon/button/element
String src = iframe.getAttribute("src");
driver.get(src); //driver object from 'MyPage.java'
// Grab cookies from current driver session (authenticated cookie information
// is vital to download the file from 'src'
StringBuilder cookies = new StringBuilder();
for (Cookie cookie : driver.manage().getCookies()){
String value = cookie.getName() + "=" + cookie.getValue();
if (cookies.length() == 0 )
cookies.append(value);
else
cookies.append(";").append(value);
}
try{
HttpURLConnection con = (HttpURLConnection) new URL(src).openConnection();
con.setRequestMethod("GET");
con.addRequestProperty("Cookie",cookies.toString());
//set your own download path, probably a dynamic file name with timestamp
String downloadPath = System.getProperty("user.dir") + File.separator + "file.pdf";
OutputStream outputStream = new FileOutputStream(new File(downloadPath));
InputStream inputStream = con.getInputStream();
int BUFFER_SIZE = 4096;
byte[] buffer = new byte[BUFFER_SIZE];
int bytesRead = -1;
while((bytesRead = inputStream.read(buffer)) != -1)
outputStream.write(buffer, 0, bytesRead);
outputStream.close();
}catch(Exception e){
// file download failed.
}
}
}
这是我的dom的样子
<iframe src="/files/downloads/pdfgenerator.aspx" id="frame01">
#document
<html>
<body>
<embed width="100%" height ="100%" src="about:blank" type="application/pdf" internalid="1234567890">
</body>
</html>
</iframe>