我正在尝试抓取此页面:https://1xbet.cm/en/line/Football/1536237-FIFA-World-Cup-2018/,因为这是足球比赛的赔率,但是当我尝试通过BeautifulSoup查找相关课程时,我什么也没有得到。有人可以解释为什么我什么都没找到吗?
class GetData():
def __init__(self, url):
self.url = url
r = requests.get(url)
self.soup = BeautifulSoup(r.text, "lxml")
def do_smth(self):
content = self.soup.find_all("div", class_="bets_content")
print(content)
url = 'https://1xbet.cm/en/line/Football/1536237-FIFA-World-Cup-2018/'
gd = GetData(url)
gd.do_smth()
答案 0 :(得分:1)
我认为BeautifulSoup
不能帮助您从该网站上抓取数据,因为该网站使用VueJS
作为JavaScript
的框架,它使用网站API /网络服务来获取最终模板。
因此,为了获取数据,您可以直接解析API / Web服务并获得所需的内容。
以下是使用requests
和re
模块的示例:
import re
import requests
class GetData:
def __init__(self):
self.main_url = 'https://1xbet.cm/en/line/Football/1536237-FIFA-World-Cup-2018/'
self.headers = {
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
'referer': 'https://1xbet.cm/en/line/Football/1536237-FIFA-World-Cup-2018/',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36'
}
def read(self, url):
with requests.get(url, headers=self.headers) as response:
if response.status_code == 200:
return response.json()
else:
raise Exception('Got error: {}'.format(response.status_code))
def pretty_print(self, msg, data):
print(msg + ' :')
print(data)
print('#' * 40)
def get_teams_id(self, url):
teams_regex = re.findall(r'/(\d+)-', self.main_url)
if teams_regex:
teams_id = teams_regex[0]
return url.format(teams_id)
else:
raise ValueError("Cannot parse Teams ID")
def get_teams_info(self, pretty_print=False):
teams_url = 'https://1xbet.cm/LineFeed/GetChampTeams?id={}&lng=en'
valid_url = self.get_teams_id(teams_url)
data = self.read(valid_url)
if pretty_print:
values = data.get('Value', [])
teams = [values[k:k+2] for k in range(0, len(values), 2)]
teams_pretty = '\n'.join(' VS '.join(map(
lambda x: '{}({})'.format(x.get('N'), x.get('I')), k)
) for k in teams
)
self.pretty_print('Teams Info', teams_pretty)
return data
def get_teams_cotes(self, pretty_print=False):
cotes_url = 'https://1xbet.cm/LineFeed/Get1x2_VZip?champs={}&count=50&lng=en&tf=1500000&mode=4'
valid_url = self.get_teams_id(cotes_url)
data = self.read(valid_url)
if pretty_print:
values = data.get('Value')
for k in values:
msg = '{}\n{} VS {}\nCotes: [{}, ..., {}]'.format(
k.get('L'),
k.get('O1'),
k.get('O2'),
k.get('E')[0],
k.get('E')[-1]
)
self.pretty_print('Events & Cotes', msg)
return data
if __name__ == '__main__':
app = GetData()
_ = app.get_teams_info(pretty_print=True)
_ = app.get_teams_cotes(pretty_print=True)
如果运行此代码,您将得到与以下代码相似的结果:
Teams Info :
Belgium(12609) VS Croatia(12739)
England(12763) VS France(12771)
########################################
Events & Cotes :
FIFA World Cup 2018
France VS Belgium
Cotes: [{'T': 1, 'G': 1, 'C': 2.58}, ..., {'T': 181, 'G': 19, 'C': 2.125}]
########################################
Events & Cotes :
FIFA World Cup 2018
Croatia VS England
Cotes: [{'T': 1, 'G': 1, 'C': 3.64}, ..., {'T': 181, 'G': 19, 'C': 1.805}]
########################################
现在轮到您来解析数据并获取所需的内容。并且请对网站保持谨慎。