python web抓取不适用于第二页

时间:2017-10-10 22:59:03

标签: python web-scraping scrapy web-crawler

我的py代码适用于第一页网址,但没有获得第二页,我想从多个页面获取标题,地址,电话号码。

from __future__ import unicode_literals
import csv

import requests
from scrapy.selector import Selector
import scrapy
def fetch_page(url):
    r = requests.get(url)
    return r.text

def upso_list_from_listpage(url):
    html = fetch_page(url)
    sel = Selector(text=html)
    upso_list = sel.css('.title_list::attr(href)').extract()
    #upso_lists = sel.xpath('//*[@id="divList"]/div/div/div[6]/div/table/tr/td[1]/table/tr/td[2]/strong/a/@href').extract()
    return upso_list

def upso_from_page(url):
    html = fetch_page(url)
    sel = Selector(text=html)
    #title = sel.xpath('/html/body/div[4]/table/tr[2]/td/div/table/tr/td[1]/table/tr/td[2]/strong/font/h1/text()[1]')
    title = sel.css('h1::text').extract()
    address = sel.css('address::text').extract()
    phone = sel.css('.mt1::text').extract()
    return {
        'title' : title,
        'address' : address,
        'phone' : phone
    }
def upso_final(page=1):
    list_url = "http://yp.koreadaily.com/list/list.asp?page={0}".format(page)+"&bra_code=SF&cat_code=S5050A&strChar=7&searchField=&txtAddr=&txtState=&txtZip=&txtSearch=&sort=N"
    upso_lists = upso_list_from_listpage(list_url)
    upsos = [upso_from_page(url) for url in upso_lists]
    return upsos


from pprint import pprint
pprint(upso_final())

我的代码从第一页提取15个列表但不通过第二页。

我需要帮助。谢谢

0 个答案:

没有答案