我正在尝试使用Python / BeautifulSoup解析网址。以下是我的代码。我只是想知道在不使用两个列表的情况下获取排序字典是否更好?或者有没有有效的方法?
import requests
from bs4 import BeautifulSoup
import operator
req = requests.get('https://www.firstchampionship.org/sponsorvideos')
soup = BeautifulSoup(req.text, "lxml")
companies = []
votes = []
all_in_one = {}
for company in soup.find_all("div", {"class": "views-field views-field-title"}):
# print(company.text.encode('ascii','ignore'))
companies.append(company.text.encode('ascii','ignore'))
for vote in soup.find_all("div", {"class": "rate-info"}):
vote_x = vote.text.split(" ")
votes.append(int(vote_x[0]))
for i, x in enumerate(companies):
all_in_one.update({x:votes[i]})
for key, value in all_in_one.iteritems():
print(key+"->"+str(value))
sorted_x = sorted(all_in_one.items(), key=operator.itemgetter(1), reverse=True)
print(sorted_x)
我的决赛如下。公司名称 - >票数
[('Analog Devices',7227),('Bechtel',6797),('NVIDIA',436), ('Qualcomm Incorporated',349),('ViaSat',292),('BOSCH',201), ('Nokia Bell Labs',124),('Walt Disney Imagineering',119),(' Google,Inc。',113),('NI',109),('FedEx',100),('NASA',97), ('波音公司',86),('美国空军',83),('第一', 74),('3M Company',73),('Twitch',73),('Baxter',70),(' Rockwell Automation',68),('Booz Allen Hamilton',68),('NRG', 66),('Mouser Electronics',63),('IBM Corporation',63),('John Deere',63),('Motorola Solutions',62),('Delphi',62),(' Boston Scientific',60),('Texas Instruments',59),('The Dow Chemical Co.',59),('PTC',59),('Xerox',58),('西南 航空公司,57),('GM',55),('Vulcan Spring',53),('Rockwell Collins',52),('Festo',52),('Monsanto',50),('LEGO Education ',39)]
答案 0 :(得分:0)
您可以获得公司标题,获取父节点,找到与投票节点匹配的下一个元素,将其附加到列表以公司名称 dict 和订单。
import requests
from bs4 import BeautifulSoup
from operator import itemgetter
req = requests.get('https://www.firstchampionship.org/sponsorvideos')
soup = BeautifulSoup(req.text, "lxml")
companies = []
for company in soup.find_all("div", {"class": "views-field views-field-title"}):
companies.append({"Company": company.get_text(strip=True).encode('ascii','ignore'), "Votes": company.parent.find_next("div", class_="rate-info").get_text(strip=True).split(' ')[0]})
print (sorted(companies, key=itemgetter('Company')))
输出:
[{'公司':' 3M公司'投票':' 76'},{'公司&# 39;:' ADI公司'投票':u' 7282'} ...
答案 1 :(得分:0)
您可以使用馆藏库中的计数器。
from collection import Counter
from bs4 import BeautifulSoup
import requests
req = requests.get('https://www.firstchampionship.org/sponsorvideos')
soup = BeautifulSoup(req.text, "lxml")
counts = Counter()
companies = soup.find_all("div", {"class": "views-field views-field-title"})
votes = soup.find_all("div", {"class": "rate-info"})
for company, vote in zip(companies, votes):
company_name = company.text.encode('ascii','ignore')
vote_count = int(vote.text.split(" ")[0])
counts[company_name] = vote_count
for company_name, vote_count in counts.most_common():
print(company_name, vote_count)