import requests
from bs4 import BeautifulSoup
'''
It's a web crawler working in ebay, collecting every single item data
'''
def ebay_spider(max_pages):
page = 1
while page <= max_pages:
url = 'http://www.ebay.co.uk/sch/Apple-Laptops/111422/i.html?_pgn=' \
+ str(page)
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.findAll('a', {'class': 'vip'}):
href = 'http://www.ebay.co.uk' + link.get('href')
title = link.string
get_single_item_data(href)
page += 1
def get_single_item_data(item_url):
source_code = requests.get(item_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for item_name in soup.findAll('h1', {'id': "itemTitle"}):
print(item_name.string)
ebay_spider(3)
Blockquote错误说:http://imgur.com/403a6N8
编辑:对不起每个人都有错误的标题和标签,一切都已修好。
我试图解决它,但它似乎不起作用,所以任何提示/答案如何解决它?
答案 0 :(得分:1)
如果你想尝试制作一个BeatifulSoup对象,请改为:
soup = BeautifulSoup(plain_text)
此:
soup = BeautifulSoup(plain_text, 'html.parser')
注意:您的问题是指bs4模块,而不是请求。
答案 1 :(得分:0)
这与请求模块完全无关。正如Jean-Francois所说,做它告诉你的事情并继续前进。
soup = BeautifulSoup(plain_text,"html.parser",markup_type=markup_type)