我编写了以下代码,以从https://www.1800wheelchair.com/category/369/transport-wheelchairs/?p=3网站上的每个产品获取数据,但是似乎有问题。它不会给出任何错误,但是也不会给出所需的输出。我的猜测是我错误地提取了每个产品的链接,这就是为什么它不在输出面板中打印的原因。我已经花了很长时间试图解决这个问题,但未能做到。
import requests
import xlsxwriter
from bs4 import BeautifulSoup
def cpap_spider(max_pages):
global row_i
page=1
while page<=max_pages:
url= "https://www.1800wheelchair.com/category/369/transport-wheelchairs/?p=" +str(page)
source_code= requests.get(url)
plain_text= source_code.text
soup= BeautifulSoup(plain_text, 'html.parser')
for link in soup.findAll("h2", {"class":"product-name"}):
href=link.find("a")['href']
title = link.string
#worksheet.write(row_i, 0, title)
#each_item(href)
print(href)
#print(title)
page+=1
def each_item(item_url):
global cols_names, row_i
source_code= requests.get(item_url)
plain_text= source_code.text
soup= BeautifulSoup(plain_text, 'html.parser')
table=soup.find("table", {"class":"specifications "})
if table:
table_rows = table.find_all('tr')
else:
return
for row in table_rows:
cols = row.select('td')
for ele in range(0,len(cols)):
temp = cols[ele].text.strip()
if temp:
if temp[-1:] == ":":
temp = temp[:-1]
# Name of column
if ele == 0:
try:
cols_names_i = cols_names.index(temp)
except:
cols_names.append(temp)
cols_names_i = len(cols_names) - 1
worksheet.write(0, cols_names_i + 1, temp)
continue;
worksheet.write(row_i, cols_names_i + 1, temp)
row_i += 1
cols_names=[]
cols_names_i = 0
row_i = 1
workbook = xlsxwriter.Workbook('st.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write(0, 0, "Title")
cpap_spider(3)
workbook.close()
答案 0 :(得分:0)
要获得正确的结果,请在请求中设置User-Agent
HTTP标头:
import requests
from bs4 import BeautifulSoup
url = 'https://www.1800wheelchair.com/category/369/transport-wheelchairs/?p=3'
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0'
}
soup = BeautifulSoup(requests.get(url, headers=headers).content, 'html.parser')
for idx, a in enumerate(soup.select('a[itemprop="url"].button'), 1):
print('{:<3} {}'.format(idx, a['href']))
打印:
1 https://www.1800wheelchair.com/product/22-bariatric-aluminum-transport-chair/
2 https://www.1800wheelchair.com/product/lightweight-bariatric-transport-chair-63523/
3 https://www.1800wheelchair.com/product/medline-bariatric-transport-chair-with-12-rear-wheels/
4 https://www.1800wheelchair.com/product/karman-t-900-extra-wide-transport-wheelchair/
5 https://www.1800wheelchair.com/product/excel-freedom-plus-bariatric-transport-chair/
6 https://www.1800wheelchair.com/product/karman-removable-arm-transport-chair/