导航到新网页并访问其内容

时间:2017-08-18 08:07:06

标签: python selenium

我想从网页导航到新页面并访问其内容,然后再次返回原始页面并访问原始页面的内容。但它会引发错误。

我能够导航到新页面并访问其内容,而且我能够导航到原始页面但无法访问原始页面的内容。

以下是我要解析的webpage

以下是我一直在处理的代码: -

import sys
import csv
import os
import time
import urllib

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

url = 'https://www.amazon.com/s/ref=sr_pg_1?sort=salesrank&ie=UTF8&rh=n%3A133140011&page=1&unfiltered=1'
driver = webdriver.Chrome();
driver.maximize_window() #For maximizing window
driver.get(url);
driver.implicitly_wait(3) #gives an implicit wait for 3 seconds
while driver.execute_script("return document.readyState") != 'complete':
    pass;
elem =driver.find_elements_by_xpath("/html/body/div[1]/div[2]/div/div[3]/div[2]/div/div[4]/div[1]/div/ul/li");
for i,item in enumerate(elem):
    temp = 0; #temp =0 refers it is best seller;
    try:
        if item.find_element_by_xpath("div/div[1]/div/a/span[1]/span").text.encode('utf-8') == "Best Seller":
            print "Best Seller :- Yes";
    except:
        print "Best Seller :- No"
        temp = 1;
    if temp == 0:

        print "Name of Book :- %s" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/a/h2").text.encode('utf-8'));
        #enter.write("Name of Book :- %s\n" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/a/h2").text.encode('utf-8')))

        print "Release Date :- %s" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/span[3]").text.encode('utf-8'));
        #enter.write("Release Date :- %s\n" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/span[3]").text.encode('utf-8')));

        print "Author :- %s" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[2]/span[2]").text.encode('utf-8'));
        #enter.write("Author :- %s\n" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[2]/span[2]").text.encode('utf-8')));

        print "URL-Page of the book :- %s" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/a").get_attribute("href"));
        #enter.write("URL-Page of the book :- %s\n" %(item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/a").get_attribute("href")));          
        new_url = item.find_element_by_xpath("div/div[2]/div/div[2]/div[1]/div[1]/a").get_attribute("href") 

        driver.get(new_url);
        while driver.execute_script("return document.readyState") != 'complete':
            pass;
        for nums in range(1,10):    
            if driver.find_element_by_xpath("""//*[@id="productDetailsTable"]/tbody/tr/td/div/ul/li[%s]/b"""%(nums)).text == 'ASIN:':
                print driver.find_element_by_xpath("""//*[@id="productDetailsTable"]/tbody/tr/td/div/ul/li[%s]"""%(nums)).text.encode('utf-8');
                break;
        print "No. of 5 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[1]/td[3]/a""").text.encode('utf-8'));
        print "No. of 4 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[2]/td[3]/a""").text.encode('utf-8'));   
        print "No. of 3 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[3]/td[3]/a""").text.encode('utf-8'));
        print "No. of 2 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[4]/td[3]/a""").text.encode('utf-8'));
        print "No. of 1 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[5]/td[3]/a""").text.encode('utf-8'));
        driver.back();
    else:
        flag = item.find_element_by_xpath("div/div/div/div[2]")
        print "Name of Book :- %s" %(flag.find_element_by_xpath("div[1]/div[1]/a/h2").text.encode('utf-8'));
        print "Release Date :- %s" %(flag.find_element_by_xpath("div[1]/div[1]/span[3]").text.encode('utf-8'));
        print "Author :- %s" %(flag.find_element_by_xpath("div[1]/div[2]/span[2]").text.encode('utf-8'));
        print "URL-Page of the book :- %s" %(flag.find_element_by_xpath("div[1]/div[1]/a").get_attribute("href"));          new_url = flag.find_element_by_xpath("div[1]/div[1]/a").get_attribute("href") 

        driver.get(new_url);
        while driver.execute_script("return document.readyState") != 'complete':
            pass;
        for nums in range(1,10):    
            if driver.find_element_by_xpath("""//*[@id="productDetailsTable"]/tbody/tr/td/div/ul/li[%s]/b"""%(nums)).text.encode('utf-8') == 'ASIN:':
                print driver.find_element_by_xpath("""//*[@id="productDetailsTable"]/tbody/tr/td/div/ul/li[%s]"""%(nums)).text.encode('utf-8');
                break;
        print "No. of 5 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[1]/td[3]/a""").text.encode('utf-8'));
        print "No. of 4 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[2]/td[3]/a""").text.encode('utf-8'));   
        print "No. of 3 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[3]/td[3]/a""").text.encode('utf-8'));
        print "No. of 2 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[4]/td[3]/a""").text.encode('utf-8'));
        print "No. of 1 star ratings :- %s"%(driver.find_element_by_xpath("""//*[@id="histogramTable"]/tbody/tr[5]/td[3]/a""").text.encode('utf-8'));       

        driver.back();

它给我以下错误

  

第57行,即

     

flag = item.find_element_by_xpath(“div / div / div / div [2]”)selenium.common.exceptions.StaleElementReferenceException:Message:   陈旧元素引用:元素未附加到页面文档

有人可以帮我吗?

1 个答案:

答案 0 :(得分:0)

执行此操作后,您的问题就是driver.back();。您在代码中有对象引用的每个元素都将变得陈旧且无效。你需要重新创建该对象。

因此,您应该以循环内部的方式重新创建循环

elem =driver.find_elements_by_xpath("/html/body/div[1]/div[2]/div/div[3]/div[2]/div/div[4]/div[1]/div/ul/li");

你的循环不应该是

for i,item in enumerate(elem):

它应该是一个数字循环,在循环内部做一个索引关联

item = elem[i]