如何缩短此代码? 我敢肯定,有一种更聪明的方式来编写代码。 工作正常,很丑。
这是一种在电子商务网站上抓取商品数量的方法。
import requests
from bs4 import BeautifulSoup
import re
url1 = "https://us.pandora.net/en/charms/?sz=30&start={}&format=page-element"
url2 = "https://us.pandora.net/en/bracelets/?sz=30&start={}&format=page-element"
url3 = "https://us.pandora.net/en/rings/?sz=30&start={}&format=page-element"
url4 = "https://us.pandora.net/en/necklaces/?sz=30&start={}&format=page-element"
url5 = "https://us.pandora.net/en/earrings/?sz=30&start={}&format=page-element"
#res = requests.get(link.format(url1),headers={"User-Agent":"Mozilla/5.0"})
soup1 = BeautifulSoup(requests.get(url1.format(0)).text, 'lxml')
soup2 = BeautifulSoup(requests.get(url2.format(0)).text, 'lxml')
soup3 = BeautifulSoup(requests.get(url3.format(0)).text, 'lxml')
soup4 = BeautifulSoup(requests.get(url4.format(0)).text, 'lxml')
soup5 = BeautifulSoup(requests.get(url5.format(0)).text, 'lxml')
total_items1 = ''.join(re.findall(r'\d', soup1.select_one('span.products-count').text))
total_items2 = ''.join(re.findall(r'\d', soup2.select_one('span.products-count').text))
total_items3 = ''.join(re.findall(r'\d', soup3.select_one('span.products-count').text))
total_items4 = ''.join(re.findall(r'\d', soup4.select_one('span.products-count').text))
total_items5 = ''.join(re.findall(r'\d', soup5.select_one('span.products-count').text))
#categories = [tag['title'].strip() for tag in soup.select('.refinement-link[title]')
#total_items_sale1 = ''.join(re.findall(r'\d', soup1.select_one('.grid-tile .price-standard')))
#total_items_sale1
#total_items_sale1
#total_items_sale1
#total_items_sale1
#print('Categories:')
#for category in categories:
#print('\t{}'.format(category))
print('\nTotal Charms: {}'.format(total_items1))
print('\nTotal Bracelets: {}'.format(total_items2))
print('\nTotal Rings: {}'.format(total_items3))
print('\nTotal Necklaces: {}'.format(total_items4))
print('\nTotal Earrings: {}'.format(total_items5))