from requests import get
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
url = "http://www.metacritic.com/browse/games/score/metascore/year/pc/filtered?sort=desc&year_selected=2018"
尝试在页面上获取所有游戏名称,元数据和用户分数
这是我到目前为止所做的:
# not sure about this but it works (I was getting blocked by something and this the way I found around it)
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
#this grabs the all the text from the page
html_soup = BeautifulSoup(webpage, 'lxml')
game_containers = html_soup.find_all("div", class_="product_item product_title")
game_names = html_soup.find_all("div", class_="product_item product_title")
game_metascores_p = html_soup.find_all("div", class_="metascore_w small game positive")[0].text.strip()
game_metascores_m = html_soup.find_all("div", class_="metascore_w small game mixed")[0].text.strip()
game_user_s = html_soup.find_all("span", class_="data textscore textscore_favorable")[0].text.strip()
#lists to store the data
names = []
metascores = []
userscores = []
#Extract data from each game
for games in game_containers:
name = games.find("div", class_="product_item product_title")
print(name)
metascore = games.find("div", class_="product_item product_title")[0].text.strip() or games.find_all("div", class_="metascore_w small game mixed")[0].text.strip()
print(meta_score)
我知道此代码存在多个问题
是“name”和“metascore”不会返回我正在寻找的信息
也不确定如何结合game_metascores_p和game_metascores_m才能正常工作(我希望游戏名称与game_metascores_p和game_metascores_s相关联,如果有一个或另一个)
任何帮助将不胜感激
这就是游戏和metascore印刷的内容:
对于游戏然后是metascore(它重复100x):
None
[]
我想要第一个(依此类推):
In to the Breach
89
答案 0 :(得分:0)
我明白了!
这段代码给了我想要的东西
# not sure about this but it works (I was getting blocked by something and this the way I found around it)
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
#this grabs the all the text from the page
html_soup = BeautifulSoup(webpage, 'html5lib')
#this is for selecting all the games in from 1 to 100 (the list of them)
game_containers = html_soup.find_all("div", class_="product_item product_title")
# print(game_containers)
game_names = html_soup.find_all("div", class_="product_item product_title")[0].text.strip()
game_metascores_p = html_soup.find_all("div", class_="metascore_w small game positive")[0].text.strip()
game_metascores_m = html_soup.find_all("div", class_="metascore_w small game mixed")[0].text.strip()
game_user_s = html_soup.find_all("span", class_="data textscore textscore_favorable")[0].text.strip()
# print(game_names)
# print(game_metascores_m or game_metascores_p)
# print(game_user_s)
#lists to store the data
names = []
metascores = []
userscores = []
#Extract data from each game
for games in game_containers:
# game_names = html_soup.find_all("div", class_="product_item product_title")[0].text.strip()
# game_metascores_p = html_soup.find_all("div", class_="metascore_w small game positive")[0].text.strip()
# game_metascores_m = html_soup.find_all("div", class_="metascore_w small game mixed")[0].text.strip()
# game_user_s = html_soup.find_all("span", class_="data textscore textscore_favorable")[0].text.strip()
name = games.find()
names.append(name.text.strip())
# metascore = games.find("div", class_="product_item product_title") or games.find_all("div", class_="metascore_w small game mixed")
# print(metascore)
# print(game_metascores_p)
没有给我metascore思想,我不知道为什么