我正在尝试网络图片website,它是一家名为matchpoint.com的博彩公司。我的目标是有一个电子表格,我可以在不用访问他们的网站的情况下查看赔率。
我的代码打印赔率和下注文本。但它并没有对赔率进行排序,因此它与下注文本相匹配。我在下面附上了一张图片,您可以看到我正在抓取的页面。对于第一个下注文本有三个赔率,而另一个下注有两个赔率附加到文本。我花了3天时间搜索stackoverflow,但我还没能从这里继续前进。
在
"""
Soccer match results scraping object.
"""
from bs4 import BeautifulSoup
import requests
class scraper():
def __init__(self):
print('__init__ called')
def parsedata(self):
self.url = "https://www.cashpoint.dk/en/?r=bets/xtra&group=461392&game=312004790"
self.res = requests.get(self.url)
self.soup = BeautifulSoup(self.res.text, "lxml")
self.table = self.soup.find_all("table", class_="sportbet_extra_list_table")
for self.items in self.table:
self.odds = self.items.find("div", class_="sportbet_content_rate_right").get_text().strip()
print(self.odds)
self.text = self.items.find("td", class_="sportbet_extra_c1").get_text().strip()
print(self.text)
Scrape = scraper()
Scrape.parsedata()
输出:
__init__ called
1,38
Hvem vinder kampen?
1,09
Dobbeltchance
1,12
Head to Head (penge tilbage ved uafgjort)
2,17
Handicap 0:1
4,25
Handicap 0:2
8,60
etc
答案 0 :(得分:0)
您应该尝试使用find_all
来提取所有赔率,而不是find
。
all_odds = self.items.find_all("div", class_="sportbet_content_rate_right").get_text().strip()
self.odds = [x.get_text().strip() for x in all_odds]
print(self.odds)
答案 1 :(得分:0)
"""
Soccer match results scraping object.
"""
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
url = "https://www.cashpoint.dk/da/?r=bets/xtra&group=467265&game=312021291"
class scraper():
def __init__(self):
print('__init__ called')
self.gamesList = []
def Cashpoint(self, url):
self.r = requests.get(url)
self.soup = BeautifulSoup(self.r.text, "lxml")
self.content = self.soup.find_all("div", class_="sportbet_extra_content")
for self.getTable in self.content:
self.oneTable = self.getTable.find_all("table", class_="sportbet_extra_list_table")
self.btextCount = len(self.oneTable) # Amount of tables
with open('names.csv', 'w', encoding='utf-8', newline='\n') as csvfile:
writer = csv.writer(csvfile)
headers = ['btext']
writer.writerow(headers)
for x in range(self.btextCount): # Bettext count.
# One Bettext, three odds per loop.
# Bettext
self.rawText = self.oneTable[x].find_all("td", class_="sportbet_extra_c1")
for self.getText in self.rawText:
self.getme = self.getText.get_text().strip().split(' ')[0]
self.txtList = [self.getme]
print(self.txtList)
# Odds
self.oddsTable = self.oneTable[x].find_all("div", class_="sportbet_content_rate_right")
for self.oddsRaw in self.oddsTable:
self.odds = self.oddsRaw.get_text().strip()
self.newodds = [self.odds]
print(self.newodds)
d = {'Bettext': [2, 52123321], 'Odds': [3, 4]}
df = pd.DataFrame(data=d)
print(df)
# print(self.gamesList)
# csvfile.close()
# print("CSVFILE CLOSED")
scrape = scraper()
scrape.Cashpoint(url)