使用Selenium从FireFox下载时阻止下载弹出窗口

时间:2017-04-28 22:05:34

标签: python-3.x selenium firefox download popup

我尝试过多种自动从网站下载pdf文档的方法,并决定使用Selenium来浏览网站并下载* .pdf文件。但是,我无法阻止下载框弹出。

这样做的帮助将受到大力赞赏......

这是我的剧本:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import re
import os.path

#setting up the FireFox profile so that no download box pops up

profile = webdriver.FirefoxProfile();
profile.set_preference("browser.download.folderList", 2);
profile.set_preference("browser.download.alertOnEXEOpen", False);
profile.set_preference("browser.helperApps.neverAsksaveToDisk", "application/x-pdf, application/acrobat, applications/vnd.pdf, text/pdf, text/x-pdf, application/vnd.cups-pdf, text/csv,application/x-msexcel,application/excel,application/x-excel,application/vnd.ms-excel,image/png,image/jpeg,text/html,text/plain,application/msword,application/xml,application/pdf");
profile.set_preference("browser.download.manager.showWhenStarting", False);
profile.set_preference("browser.download.manager.focusWhenStarting", False);
profile.set_preference("browser.helperApps.alwaysAsk.force", False);
profile.set_preference("browser.download.manager.alertOnEXEOpen", False);
profile.set_preference("browser.download.manager.closeWhenDone", False);
profile.set_preference("browser.download.manager.showAlertOnComplete", False);
profile.set_preference("browser.download.manager.useWindow", False);
profile.set_preference("browser.download.manager.showWhenStarting", False);
profile.set_preference("services.sync.prefs.sync.browser.download.manager.showWhenStarting", False);
profile.set_preference("pdfjs.disabled", True);

#opens the Firefox browser and goes to the website
browser = webdriver.Firefox(profile)
browser.get('http://www.smad.gov.sk.ca/Pages/BasePages/Main.aspx?UseCase=ExternalSearch')

#finds the query box
elem = browser.find_element_by_id('ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_txtFileNumber')

#submits the query
elem.send_keys('-' + Keys.RETURN)

#explicit wait until ready to continue
try:
    x = WebDriverWait(browser, 80).until(EC.presence_of_element_located((By.NAME, "ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$grdMainSearch$ctl03$btnViewDetails")))
finally:    #once it has waited 80 seconds, or until the content is loaded, then it continues and selects the first file
    browser.find_element(By.NAME, "ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$grdMainSearch$ctl03$btnViewDetails").click()

try:
    x = WebDriverWait(browser, 60).until(EC.presence_of_element_located((By.NAME, "ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$ucView$ucAssessmentFileView$btnMapsAndDocumentsTreeView")))
finally:    #wait til load, then selects the "view data" tab
    browser.find_element(By.NAME, "ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$ucView$ucAssessmentFileView$btnMapsAndDocumentsTreeView").click()

try:
    x = WebDriverWait(browser, 60).until(EC.presence_of_element_located((By.NAME, "ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$ucView$ucMapsAndDocuments$btnRefresh")))
finally:    #wait til load, then selects the "ALL" in category and proceeed
    browser.find_elements_by_css_selector("input[type='radio'][name='ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$ucView$ucMapsAndDocuments$lstCategoryTypes']")[4].click()
    browser.find_element(By.NAME, "ctl00$ContentPlaceHolder1$plc1Content$ucExternalAssessmentSearchView$ucView$ucMapsAndDocuments$btnRefresh").click()

try:
    x = WebDriverWait(browser, 60).until(EC.presence_of_element_located((By.NAME, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilen0")))
finally:    #wait til load, then expand the elements
    browser.find_element(By.ID, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilen3").click()
    browser.find_element(By.ID, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilen0").click()
    browser.find_element(By.ID, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilen5").click()
    browser.find_element(By.ID, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilen7").click()
    browser.find_element(By.ID, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilen9").click()


save_path = "C:/Users/Jacob/Documents/ArcGIS/Saskachewan/Assessment work/"
Name_of_folder = browser.find_element(By.ID,"ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_lblAssessmentFileNumberValue").text
newpath = os.path.join(save_path, Name_of_folder)
if not os.path.exists(newpath):
    os.makedirs(newpath)

profile.set_preference("browser.download.dir",("browser.download.dir", newpath));

#download files
browser.find_element(By.ID, "ctl00_ContentPlaceHolder1_plc1Content_ucExternalAssessmentSearchView_ucView_ucMapsAndDocuments_tvLogFolderAndFilet1").click()

1 个答案:

答案 0 :(得分:0)

在Firefox中,您可以设置此首选项   browser.download.manager.showAlertOnCompletefalse禁用这些下载弹出窗口。