Python Selenium - 元素未附加到页面文档

时间:2016-07-19 22:22:16

标签: python parsing selenium selenium-webdriver

我试图解析脸书Facebook粉丝并在行中获得例外

driver.get("{}".format(url.get_attribute("href")))
  

selenium.common.exceptions.StaleElementReferenceException:消息:   陈旧元素引用:元素未附加到页面文档

程序切换到循环的第二个元素

#coding: utf-8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
import random
from PIL import Image
import csv
import string
import re

opts = Options()
opts.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36")
#driver = webdriver.Chrome(executable_path='/Users/sweeetopium/PycharmProjects/education_py/education_py/chromedriver', chrome_options=opts)
driver = webdriver.Chrome(executable_path='/Users/kr/PycharmProjects/education_py/chromedriver',chrome_options=opts)


user_login = ''
user_pass = ''

def login_to_fb():
    driver.set_window_size(1280,800)
    driver.get("https://facebook.com/login/")
    driver.find_element_by_id('email').send_keys(user_login)
    time.sleep(1)
    driver.find_element_by_id('pass').send_keys(user_pass)
    time.sleep(3)
    driver.find_element_by_id('loginbutton').send_keys(Keys.RETURN)
    time.sleep(5)
    driver.get("https://www.facebook.com/page")
    time.sleep(15)
    driver.find_element_by_tag_name('body').click()
    time.sleep(5)
    for i in xrange(10):
        driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN)
        time.sleep(1)
        print(i)
    time.sleep(10)
    all_likes_urls = driver.find_elements_by_xpath("//a[contains(@href, '/ufi/')]")
    return all_likes_urls

def open_post_likes():
    all_likers = []
    for url in login_to_fb():
        time.sleep(10)
        driver.get("{}".format(url.get_attribute("href")))
        print("ok1")
        time.sleep(5)
        driver.find_element_by_tag_name("body").click()
        time.sleep(2)
        print("ok2")
        likers = driver.find_elements_by_xpath("//ul[contains(@class, 'uiList')]/li/div/ul/li/div/a")
        print("ok3")
        for liker in likers:
            print liker.get_attribute("href")
    return all_likers

print open_post_likes()

我做错了什么?

1 个答案:

答案 0 :(得分:1)

导航到for循环中的另一个页面后,您不再在同一页面上。如果您导航回到在for循环的每个迭代结束时从all_likes_urls获取def open_post_likes(): all_likers = [] for url in login_to_fb(): time.sleep(10) driver.get("{}".format(url.get_attribute("href"))) print("ok1") time.sleep(5) driver.find_element_by_tag_name("body").click() time.sleep(2) print("ok2") likers = driver.find_elements_by_xpath("//ul[contains(@class, 'uiList')]/li/div/ul/li/div/a") print("ok3") for liker in likers: print liker.get_attribute("href") driver.back() ##go back to the previous page ## driver.get("https://www.facebook.com/page") ## Or go to the original page where u fetched urls from return all_likers 的原始页面,则应该解决此问题,即

<?php 
    header("Content-Type: application/octet-stream");
    $name = $_GET['name'];
    header("Content-Disposition: attachment; filename=\"$name\"");
    $path = str_replace(' ', '%20', $_GET['path']);
    readfile($path);
    exit;
?>