数据从网站爬行。结果多次更改

时间:2016-10-03 08:38:02

标签: python-2.7 beautifulsoup data-cleaning

在这段代码中我试图获取手机的某些属性,但我无法获取信息。虽然这些属性存在于网址中。

import requests,re
from bs4 import BeautifulSoup
from time import sleep
import urllib2

def demo(url):
    header = {'user-agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64)', }
    array = []
    camera_review = processor_review = battery_review = display_review = verdict = pro = con = ""
    request=""
    while True:
        try:
            #request=urllib2.urlopen(url)
            request = requests.get(url, "headers=header").text
            break
        except requests.exceptions.ConnectionError:
            print " connection error"
            sleep(15)
############################################################################################################
    soup = BeautifulSoup(request, "html.parser")
    try:
        pros = soup.find("ul", attrs={"class": "for_list_overview"})
        for i in pros.find_all('li'):
            temp= i.find('span').contents[0]
            pro += temp + "\n"
    except AttributeError:
        print "pro not found"
#################################################################################################################
    try:
        cons = soup.find("ul", attrs={"class": "against_list_overview"})
        for j in cons.find_all('li'):
            temp = j.find('span').contents[0]
            con +=temp + "\n"
        print con

    except AttributeError:
        print "con not available"
################################################################################################################
    try:
        dac = soup.find("div", attrs={"class": "overview_specs_green_box display_none"})
        for k in dac.find_all(text=re.compile('camera')):
            camera_review +=k
        print camera_review
    except AttributeError:
        print "camera review not available"
            #print k
        #################################################################################
    try:
        for l in dac.find_all(text=re.compile('processor')):
            processor_review +=l
        print processor_review
    except AttributeError:
        print "processor review not available"
            #print l
        #################################################################################
    try:
        for m in dac.find_all(text=re.compile('battery')):
            battery_review +=m
        print battery_review
    except AttributeError:
        print "battery review not available"

            #print m
        #################################################################################
    try:
        for n in dac.find_all(text=re.compile('display')):
            display_review +=n
        print display_review
    except AttributeError:
        print "display review not available"

##############################################################################################################

    try:
        vid = soup.find("div", attrs={"style": "font-weight:400 !important;color: #3c3c3c;"})
        for o in vid.find_all(text=re.compile('a')):
            verdict +=o
        if len(verdict) == 0:
            temp = 'na'
            verdict +=temp
        print verdict
    except AttributeError:
        print "verdict Attribute Error"



url=["http://www.91mobiles.com/xiaomi-redmi-note-3-32gb-price-in-india",
     "http://www.91mobiles.com/blackberry-priv-price-in-india",
     "http://www.91mobiles.com/oneplus-3-price-in-india",
     "http://www.91mobiles.com/coolpad-note-5-price-in-india",
     "http://www.91mobiles.com/vivo-v3-max-price-in-india",
     "http://www.91mobiles.com/oppo-f1s-price-in-india"]

i=0
while i<len(url):
    demo(url[i])
    print "###################################################################################################"
    print "###################################################################################################"
    i+=1

每次运行代码时输出都会改变。

0 个答案:

没有答案