满足基于日期的条件时,如何停止运行代码?

时间:2020-11-08 18:56:09

标签: python web-scraping while-loop conditional-statements

我得到了以下代码,它正在抓取以下网页here

from selenium import webdriver

import os

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

import xlsxwriter

import time



#chrome driver connection
chrome_driver = os.path.abspath('C:/Users/ross/Desktop/chromedriver.exe')
browser = webdriver.Chrome(chrome_driver)


#don't want to iterate through multiple bond offerings, just different bond yields so the url is fully hard coded in
browser.get('https://finra-markets.morningstar.com/BondCenter/BondTradeActivitySearchResult.jsp?ticker=C864557&startdate=10%2F08%2F2019&enddate=10%2F08%2F2020')


#clicks agreement to get to data we want
WebDriverWait(browser, 60).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#ms-agreement > input'))).click()

#writes to worksheet
workbook = xlsxwriter.Workbook('test.xlsx')
worksheet = workbook.add_worksheet()

#set date which you want to stop at
target_date = '10/2/2020'


def scrape_finra(iteration):

    #for each iteration get the columns with dates, yields, statuses, and quantities
    retrieve_date = WebDriverWait(browser, 20).until(EC.presence_of_all_elements_located(
        (By.XPATH, '//*[@id="ms-glossary"]/div/table/tbody/tr[' + str(iteration) + ']/td[1]/div')))
    retrieve_yield = WebDriverWait(browser, 20).until(EC.presence_of_all_elements_located(
        (By.XPATH, '//*[@id="ms-glossary"]/div/table/tbody/tr[' + str(iteration) + ']/td[7]/div')))
    retrieve_status = WebDriverWait(browser, 20).until(EC.presence_of_all_elements_located(
        (By.XPATH, '//*[@id="ms-glossary"]/div/table/tbody/tr[' + str(iteration) + ']/td[4]/div')))
    retrieve_quantity = WebDriverWait(browser, 20).until(EC.presence_of_all_elements_located(
        (By.XPATH, '//*[@id="ms-glossary"]/div/table/tbody/tr[' + str(iteration) + ']/td[5]/div')))

    
    status = retrieve_status[0].text
    date = retrieve_date[0].text
    bond_yield = retrieve_yield[0].text

    #if the quantity is over 5 million, it is entered as '5MM+' on the site. This is converted to 5 million so all quantities can be entered as integers
    if retrieve_quantity[0].text == "5MM+":
        quantity = 5000000
    else:
        quantity = retrieve_quantity[0].text

    return date, bond_yield, status, quantity



stop_running = False

#row_iteration keeps track of what is being scraped on the WEB PAGE
row_iteration = 1

#tot_num_rows keeps track of where the data is being entered on the excel spreadsheet
tot_num_rows = 0

#click_num keeps track of the number page that we are currently on
click_num = 1
while stop_running is False:

    date = scrape_finra(row_iteration)[0]
    bond_yield = scrape_finra(row_iteration)[1]
    status = scrape_finra(row_iteration)[2]
    quantity = scrape_finra(row_iteration)[3]

    # row_iteration begins at 1 and works all the way up to 20, unless the code gets stopped because the target_date is met
    row_iteration += 1

    #CODE DOESN'T STOP RUNNING WHEN DATE < TARGET_DATE
    if str(date) < str(target_date):

        stop_running = True

    #as long as the status is 'Trade' and all other fields are non-blanks then write to worksheet
    if status == 'Trade' and date != "" and bond_yield != "" and quantity != "":

        worksheet.write_string(tot_num_rows, 0, str(date))
        worksheet.write_number(tot_num_rows, 1, float(bond_yield))
        worksheet.write_number(tot_num_rows, 2, int(quantity))
        tot_num_rows += 1
        print(date, bond_yield, quantity)




    if row_iteration == 20:
        click_num += 1
        WebDriverWait(browser, 60).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "input.qs-ui-ipt.qs-pageutil-input"))).clear()
        WebDriverWait(browser, 60).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "input.qs-ui-ipt.qs-pageutil-input"))).send_keys(str(click_num))
        WebDriverWait(browser, 60).until(EC.element_to_be_clickable(
            (By.CSS_SELECTOR, '#ms-glossary > div > table > tfoot > tr > td > div > a.qs-pageutil-go'))).click()
        row_iteration = 0
        time.sleep(5)


workbook.close()

一切正常,除了击中时

if str(date) < str(target_date):

    stop_running = True

尽管我从网页返回的日期小于我的target_date(在这种情况下,target_date是10/2/2020),但代码仍将继续运行。奇怪的是,如果我更改了上面的代码片段,使它成为==而不是<,它将完全按计划工作,并且无论target_date是什么都将停止。

为什么<符号不能停止我的代码运行并打印到我的Excel工作表?

非常感谢所有帮助!

此致

罗斯

0 个答案:

没有答案