因此,我正在尝试自动化并从http://50.17.237.182/PIM/
获取数据。如果输入类似1355025
的值,它将在右窗格中加载文本。它在Firefox上可以正常工作,但在PhantomJS上,即使我放了Wait While之类的东西,它也根本没有加载所需的元素。下面是完整的代码。
from time import sleep
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
def init_phantomjs_driver(*args, **kwargs):
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zen-US,en;q=0.9,ur;q=0.8',
'Referer': 'http://propertymap.sfplanning.org/',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Upgrade-Insecure-Requests': '1',
'Connection': 'keep-alive'
}
for key, value in headers.items():
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = value
# webdriver.DesiredCapabilities.PHANTOMJS[
# 'phantomjs.page.settings.userAgent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
# driver = webdriver.PhantomJS(*args, **kwargs)
if __name__ == '__main__':
driver = None
url = 'http://propertymap.sfplanning.org/'
url = 'http://50.17.237.182/PIM/'
input_apn = '1355025'
# init_phantomjs_driver()
driver = webdriver.PhantomJS(executable_path='/Setups/phantomjs-1.9.8-macosx/bin/phantomjs')
# driver = webdriver.Firefox()
driver.set_window_size(1400, 1000)
driver.get(url)
sleep(2)
# input
# driver.find_element_by_id('addressInput').send_keys('1344038')
# elem = driver.find_element_by_id('findButton')
elem = driver.find_element_by_id('addressInput')
if elem:
elem.send_keys(input_apn)
sleep(5)
elem = driver.find_element_by_id('findButton')
if elem:
elem.click()
sleep(10)
myElem = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, 'map_canvas_container')))
driver.save_screenshot('page.png')
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
driver.quit()
entries = soup.select('.reportData')
print(entries)
保存屏幕截图返回