如何在加载时打印页面?
执行此操作print driver.page_source.encode('utf-8')
时,输出似乎是第一页而不是搜索结果
import sys
import time
import getopt
from pyvirtualdisplay import Display
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def start_display():
display = Display(visible=0, size=(800, 600))
display.start()
return display
def init_driver():
driver = webdriver.Firefox()
driver.wait = WebDriverWait(driver, 5)
return driver
def lookup(driver, argv):
driver.get('http://www.google.dk')
query = ''
opts, args = getopt.getopt(argv, 'hg:d', ['query='])
for opt, arg in opts:
if opt == '--query':
query = arg
try:
box = driver.wait.until(EC.presence_of_element_located(
(By.NAME, 'q')))
box.send_keys(query)
button = driver.wait.until(EC.element_to_be_clickable(
(By.NAME, 'btnG')))
button.click()
print driver.page_source.encode('utf-8')
except TimeoutException:
print>>sys.stderr, 'Box or Button not found in google.com'
if __name__ == '__main__':
display = start_display()
driver = init_driver()
lookup(driver, sys.argv[1:])
time.sleep(5)
driver.quit()
display.stop()
答案 0 :(得分:1)
如果您的应用程序在pageLoaded之后更新内容,则必须打印页面正文而不是page_source。
见下面的代码:
try:
box = driver.wait.until(EC.presence_of_element_located(
(By.NAME, 'q')))
box.send_keys(query)
button = driver.wait.until(EC.element_to_be_clickable(
(By.NAME, 'btnG')))
button.click()
time.sleep(10)
print driver.find_element_by_tag_name("body").get_attribute("innerHTML").encode('utf-8')
except TimeoutException:
print>>sys.stderr, 'Box or Button not found in google.com'
答案 1 :(得分:1)
您需要在点击搜索按钮btnG
后添加等待,否则它将不会等待XHR请求完成
try:
box = driver.wait.until(EC.presence_of_element_located(
(By.NAME, 'q')))
box.send_keys(query)
button = driver.wait.until(EC.element_to_be_clickable(
(By.NAME, 'btnG')))
button.click()
driver.wait.until(EC.presence_of_element_located(
(By.ID, 'resultStats')))
print driver.page_source.encode('utf-8')