我将任意数量的地址推送到trulia搜索栏中后,都会不断出现此错误。
selenium.common.exceptions.WebDriverException: Message: TypeError: curContainer.frame.document.documentElement is null
我不确定此错误是什么意思,或者如何只是简单地忽略它。这是我的代码,仅供参考。
from selenium import webdriver
from selenium.webdriver.remote import webelement
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from bs4 import BeautifulSoup
import os
from datetime import datetime
from selenium.webdriver import ActionChains
import random
input_file = ".\\pa-property-value-tools\\input\\active_assets_RETRY.xlsx"
input_df = pd.read_excel(input_file)
input_df['Address'] = input_df['Address'].astype(str)
output_df = pd.DataFrame(columns=['Account','Address', 'trulia_estimate'])
driver = webdriver.Firefox(executable_path = 'C:\\Users\\Morgan.weiss\\Downloads\\geckodriver-v0.24.0-win64\\geckodriver.exe')
#driver = webdriver.Chrome('C:\\Users\\Morgan.weiss\\Downloads\\chromedriver_win32\\chromedriver.exe')
# actionChains = ActionChains(driver)
def append_date_timestamp(filepath, extension):
return (
filepath + "-" + datetime.now().strftime("%Y-%m-%d %H-%M-%S") + "." + extension
)
def get_trulia_estimate(address):
price = dict()
price['estimate'] = 'N/A'
price['listing'] = 'N/A'
driver.get('https://www.trulia.com/')
print(address)
driver.find_element_by_id('homepageSearchBoxTextInput').clear()
driver.find_element_by_id('homepageSearchBoxTextInput').send_keys(address)
driver.find_element_by_css_selector("button[data-auto-test-id='searchButton']").click()
time.sleep(3)
soup = BeautifulSoup(driver.page_source, 'html.parser')
try:
trulia_est_text = soup.select("span.Text__TextBase-sc-1cait9d-0.OmRik")
if trulia_est_text[0].text == 'Trulia Estimate':
trulia_est = soup.select("div.Text__TextBase-sc-1cait9d-0-div.Text__TextContainerBase-sc-1cait9d-1.hlvKRM")
if price['estimate']:
price['estimate'] = trulia_est[0].text
else:
listing_price = soup.select("div.Text__TextBase-sc-1cait9d-0-div.Text__TextContainerBase-sc-1cait9d-1.hlvKRM")
if price['listing']:
price['listing'] = listing_price[0].text
return price
except (IndexError,TypeError):
return price
outputfile = append_date_timestamp(".\\pa-property-value-tools\\output\\trulia", "csv")
count = 0
wait_after = 100
for row in input_df.itertuples():
count += 1
price = get_trulia_estimate(row.Address)
output_df = output_df.append({
'Account': row.Account,
'Address': row.Address,
'trulia_estimate':price["estimate"],
'trulia_listing': price["listing"]
},
ignore_index=True,
)
if count % wait_after == 0:
# if file does not exist write header
if not os.path.isfile(outputfile):
output_df.to_csv(outputfile, index=False)
else: # else it exists so append without writing the header
output_df.to_csv(outputfile, mode='a', index=False, header=False)
output_df = pd.DataFrame(columns=['Account','Address', 'trulia_estimate', 'trulia_listing'])
print("Waiting between 3 minutes and 7 minutes " + str(wait_after) + " calls")
time.sleep(random.randint(180,420))
time.sleep(random.randint(3,7))
if count % wait_after > 0:
output_df.to_csv(outputfile, mode='a', index=False, header=False)
我试图忽略TypeError,但它似乎不起作用,任何建议都将不胜感激。