加载时的Selenium打印页面

时间:2016-02-16 09:50:51

标签: python selenium

如何在加载时打印页面?

执行此操作print driver.page_source.encode('utf-8')时,输出似乎是第一页而不是搜索结果

import sys
import time
import getopt
from pyvirtualdisplay import Display
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def start_display():
    display = Display(visible=0, size=(800, 600))
    display.start()
    return display

def init_driver():
    driver = webdriver.Firefox()
    driver.wait = WebDriverWait(driver, 5)
    return driver

def lookup(driver, argv):
    driver.get('http://www.google.dk')

    query = ''

    opts, args = getopt.getopt(argv, 'hg:d', ['query='])
    for opt, arg in opts:
        if opt == '--query':
            query = arg

    try:
        box = driver.wait.until(EC.presence_of_element_located(
            (By.NAME, 'q')))
        box.send_keys(query)
        button = driver.wait.until(EC.element_to_be_clickable(
            (By.NAME, 'btnG')))
        button.click()
        print driver.page_source.encode('utf-8')

    except TimeoutException:
        print>>sys.stderr, 'Box or Button not found in google.com'

if __name__ == '__main__':
    display = start_display()
    driver = init_driver()
    lookup(driver, sys.argv[1:])
    time.sleep(5)

    driver.quit()
    display.stop()

2 个答案:

答案 0 :(得分:1)

如果您的应用程序在pageLoaded之后更新内容,则必须打印页面正文而不是page_source。

见下面的代码:

try:
    box = driver.wait.until(EC.presence_of_element_located(
        (By.NAME, 'q')))
    box.send_keys(query)
    button = driver.wait.until(EC.element_to_be_clickable(
        (By.NAME, 'btnG')))
    button.click()
    time.sleep(10)
    print driver.find_element_by_tag_name("body").get_attribute("innerHTML").encode('utf-8')

except TimeoutException:
    print>>sys.stderr, 'Box or Button not found in google.com'

答案 1 :(得分:1)

您需要在点击搜索按钮btnG后添加等待,否则它将不会等待XHR请求完成

try:
    box = driver.wait.until(EC.presence_of_element_located(
        (By.NAME, 'q')))
    box.send_keys(query)
    button = driver.wait.until(EC.element_to_be_clickable(
        (By.NAME, 'btnG')))
    button.click()

    driver.wait.until(EC.presence_of_element_located(
            (By.ID, 'resultStats')))

    print driver.page_source.encode('utf-8')