硒-下载文件

时间:2018-07-23 01:24:52

标签: python selenium screen-scraping downloading

我正在尝试运行一个脚本来访问Nasdaq网站,以下载过去18个月内公司列表的股票信息。运行以下脚本后,我仅设法使用公司信息和下载按钮打开Firefox页面,但无法立即为我下载。

为什么?

def pull_nasdaq_data(tickers,save_path,rm_path):

# To prevent download dialog box in selenium
profile = webdriver.FirefoxProfile()
profile.set_preference('browser.download.folderList', 2) # custom location
profile.set_preference('browser.download.manager.showWhenStarting', False)
profile.set_preference('browser.download.dir', r'C:\Users\Filippo Sebastio\Desktop\Stock')
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/plain, application/vnd.ms-excel, text/csv, application/csv, text/comma-separated-values, application/download, application/octet-stream, binary/octet-stream, application/binary, application/x-unknown")

# Setup Webdriver

driver = webdriver.Firefox(executable_path=r'C:\Users\Filippo Sebastio\Desktop\geckodriver.exe')


popup = True  # Will there be a popup?

for ticker in tickers:
    # Get the stocks website
    site = 'http://www.nasdaq.com/symbol/' + ticker + '/historical'
    driver.get(site)
    # Choose 10 year data from a drop down
    data_range = driver.find_element_by_name('ddlTimeFrame')
    for option in data_range.find_elements_by_tag_name('option'):
        if option.text == '18 Months':
            option.click()
            break
    time.sleep(10)

    # Click to Download Data
    driver.find_element_by_id('lnkDownLoad').click()

    # Open the file from the downloads folder
    time.sleep(25)  # Wait for file to download
    data = pd.read_csv('~/Downloads/HistoricalQuotes.csv')

    # Rename and save the file in the desired location
    file_loc = save_path + ticker + '.csv'
    data.to_csv(file_loc, index=False)

    # Delete the downloaded file
    os.remove(removal_path)

    print("Downloaded:  ", ticker)

    # Wait for the next page to load
    time.sleep(20)  


tickers = ['tesla', 'mmm']  
save_path = my patht to where I want the docuemnts downloaded
rm_path = my Download path 

pull_nasdaq_data(tickers, save_path, rm_path)

0 个答案:

没有答案