我正在尝试进行一些网络抓取操作,以下载欧洲百万富翁的所有结果,但现在仍然存在错误。我使用指定模块的jupyter和python 3。仅使用一个链接,代码就可以正常工作,但是现在我添加了一个循环以及一些修改和rip xD
import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
years = list(range(2004,2018))
for year in years:
my_urls = ('https://www.euro-millions.com/pt/arquivo-de-resultados-' + str(year),)
my_url = my_urls[0]
for my_url in my_urls:
Client = uReq(my_url)
html = Client.read
Client.close()
euro = soup(html, "html")
containers = euro.findAll("div",{"class":"archives"})
print(containers)
container = containers[0]
for container in containers:
data = container.a["href"].replace('/pt/resultados/','') #Usamos os [] como num dicionario, .strip tbm retira o lixo sometimes
bolasN = container.ul.findAll("li",{"class":"ball"})
bolasS = container.ul.findAll("li",{"class":"lucky-star"})
bola1 = bolasN[0].text
bola2 = bolasN[1].text
bola3 = bolasN[2].text
bola4 = bolasN[3].text
bola5 = bolasN[4].text
star1 = bolasS[0].text
star2 = bolasS[1].text
TUDO = [data, bola1, bola2, bola3, bola4, bola5, star1, star2]
print(TUDO)
回溯:
TypeError Traceback (most recent call last)
<ipython-input-31-b11e2044b5ea> in <module>
12 html = Client.read
13 Client.close()
---> 14 euro = soup(html, "html")
15 containers = euro.findAll("div",{"class":"archives"})
16 print(containers)
/usr/local/lib/python3.5/dist-packages/bs4/__init__.py in __init__(self, markup, features, builder, parse_only, from_encoding, exclude_encodings, **kwargs)
244 if hasattr(markup, 'read'): # It's a file-type object.
245 markup = markup.read()
--> 246 elif len(markup) <= 256 and (
247 (isinstance(markup, bytes) and not b'<' in markup)
248 or (isinstance(markup, str) and not '<' in markup)
TypeError: object of type 'method' has no len()
答案 0 :(得分:0)
尝试直接传递HTML文本
soup = BeautifulSoup(html.text)
答案 1 :(得分:0)
标记错误,应为“ html.parser”或“ lxml”
import requests
from bs4 import BeautifulSoup as soup
years = list(range(2004,2018))
for year in years:
my_urls = ('https://www.euro-millions.com/pt/arquivo-de-resultados-' + str(year),)
my_url = my_urls[0]
for my_url in my_urls:
Client = requests.get(my_url)
euro = soup(Client.content, "html.parser")
containers = euro.findAll("div",{"class":"archives"})
#print(containers)
container = containers[0]
for container in containers:
data = container.a["href"].replace('/pt/resultados/','') #Usamos os [] como num dicionario, .strip tbm retira o lixo sometimes
bolasN = container.ul.findAll("li",{"class":"ball"})
bolasS = container.ul.findAll("li",{"class":"lucky-star"})
bola1 = bolasN[0].text
bola2 = bolasN[1].text
bola3 = bolasN[2].text
bola4 = bolasN[3].text
bola5 = bolasN[4].text
star1 = bolasS[0].text
star2 = bolasS[1].text
TUDO = [data, bola1, bola2, bola3, bola4, bola5, star1, star2]
print(TUDO)
输出:
['29-12-2017', '4', '8', '22', '23', '48', '1', '12']
['26-12-2017', '4', '17', '30', '43', '44', '2', '10']
['22-12-2017', '5', '24', '30', '31', '43', '3', '6']
['19-12-2017', '8', '15', '30', '38', '46', '4', '7']
['15-12-2017', '25', '30', '31', '42', '50', '2', '11']
['12-12-2017', '20', '37', '39', '44', '50', '4', '8']
['08-12-2017', '4', '22', '30', '32', '34', '3', '4']
['05-12-2017', '11', '36', '43', '44', '48', '2', '7']
['01-12-2017', '5', '24', '29', '35', '46', '11', '12']
['28-11-2017', '1', '6', '12', '18', '42', '2', '7']
['24-11-2017', '19', '24', '28', '30', '50', '3', '10']
['21-11-2017', '2', '10', '14', '28', '31', '5', '7']
['17-11-2017', '20', '26', '35', '36', '42', '5', '12']
['14-11-2017', '14', '16', '39', '40', '41', '8', '10']
['10-11-2017', '13', '22', '29', '36', '37', '1', '9']
['07-11-2017', '7', '19', '20', '37', '41', '2', '12']
['03-11-2017', '5', '12', '17', '33', '41', '4', '9']
['31-10-2017', '1', '12', '36', '43', '46', '3', '5']
['27-10-2017', '3', '16', '23', '32', '39', '1', '4']
['24-10-2017', '9', '11', '13', '27', '33', '7', '10']
['20-10-2017', '4', '17', '23', '27', '30', '3', '8']
['17-10-2017', '13', '17', '19', '26', '36', '2', '3']
['13-10-2017', '23', '29', '37', '45', '50', '5', '11']
['10-10-2017', '4', '21', '34', '36', '37', '3', '6']
['06-10-2017', '1', '9', '15', '19', '25', '1', '7']
['03-10-2017', '6', '24', '32', '48', '50', '1', '5']
['29-09-2017', '7', '18', '19', '32', '48', '3', '7']
['26-09-2017', '1', '29', '40', '41', '48', '6', '12']
['22-09-2017', '6', '11', '31', '39', '42', '1', '3']
['19-09-2017', '1', '8', '21', '30', '45', '2', '3']
['15-09-2017', '13', '18', '37', '44', '49', '9', '12']
['12-09-2017', '10', '17', '27', '29', '35', '4', '11']
['08-09-2017', '9', '24', '42', '47', '49', '1', '5']
['05-09-2017', '6', '9', '18', '28', '29', '1', '9']
['01-09-2017', '3', '7', '8', '14', '49', '5', '8']
['29-08-2017', '4', '12', '15', '32', '38', '1', '5']
['25-08-2017', '1', '5', '7', '15', '47', '9', '12']
['22-08-2017', '3', '10', '12', '17', '27', '3', '5']
['18-08-2017', '2', '24', '39', '42', '45', '2', '8']
['15-08-2017', '10', '14', '30', '35', '46', '4', '10']
['11-08-2017', '18', '28', '39', '46', '48', '5', '12']
['08-08-2017', '15', '25', '26', '40', '41', '4', '5']
['04-08-2017', '29', '30', '36', '40', '41', '2', '9']
['01-08-2017', '14', '21', '24', '29', '30', '8', '10']
['28-07-2017', '5', '9', '29', '31', '41', '2', '4']
['25-07-2017', '12', '14', '43', '44', '48', '2', '11']
['21-07-2017', '1', '8', '9', '26', '49', '5', '9']
['18-07-2017', '1', '25', '27', '41', '45', '5', '7']
['14-07-2017', '11', '14', '20', '21', '47', '7', '10']
['11-07-2017', '14', '22', '26', '42', '50', '8', '10']
['07-07-2017', '11', '20', '35', '37', '45', '3', '6']
['04-07-2017', '10', '22', '25', '37', '49', '5', '8']
['30-06-2017', '17', '35', '39', '47', '50', '6', '8']
['27-06-2017', '9', '17', '21', '28', '45', '1', '3']
['23-06-2017', '3', '4', '21', '31', '38', '3', '7']
['20-06-2017', '11', '18', '26', '43', '44', '8', '10']
['16-06-2017', '15', '17', '38', '41', '42', '9', '12']
['13-06-2017', '3', '12', '22', '27', '49', '4', '11']
['09-06-2017', '9', '20', '27', '39', '43', '10', '11']
['06-06-2017', '20', '22', '25', '37', '40', '3', '7']
['02-06-2017', '8', '10', '24', '33', '42', '3', '9']
['30-05-2017', '7', '12', '27', '38', '48', '6', '9']
['26-05-2017', '5', '7', '26', '36', '39', '2', '10']
['23-05-2017', '8', '15', '25', '27', '42', '1', '4']
['19-05-2017', '9', '11', '12', '19', '30', '4', '9']
['16-05-2017', '8', '11', '15', '20', '30', '3', '8']
['12-05-2017', '2', '20', '28', '29', '44', '3', '9']
['09-05-2017', '8', '12', '16', '22', '26', '6', '7']
['05-05-2017', '3', '7', '30', '35', '43', '1', '3']
['02-05-2017', '6', '19', '23', '25', '27', '11', '12']
['28-04-2017', '14', '20', '25', '30', '39', '2', '8']
['25-04-2017', '9', '11', '19', '32', '43', '3', '9']
['21-04-2017', '2', '13', '16', '22', '49', '4', '5']
['18-04-2017', '17', '22', '31', '38', '45', '5', '12']
['14-04-2017', '4', '14', '20', '23', '33', '6', '10']
['11-04-2017', '5', '21', '22', '31', '49', '2', '8']
['07-04-2017', '2', '10', '19', '35', '50', '6', '7']
['04-04-2017', '1', '9', '24', '33', '34', '2', '6']
['31-03-2017', '17', '24', '26', '28', '45', '4', '12']
['28-03-2017', '9', '13', '31', '33', '46', '6', '10']
['24-03-2017', '2', '17', '21', '27', '34', '5', '9']
['21-03-2017', '1', '20', '23', '44', '47', '4', '11']
['17-03-2017', '6', '10', '19', '29', '36', '3', '9']
['14-03-2017', '3', '5', '21', '36', '44', '3', '6']
['10-03-2017', '31', '36', '38', '47', '49', '8', '11']
['07-03-2017', '6', '37', '41', '48', '50', '4', '5']
['03-03-2017', '2', '11', '29', '30', '47', '1', '12']
['28-02-2017', '10', '20', '31', '35', '42', '2', '12']
['24-02-2017', '2', '4', '13', '22', '43', '8', '9']
['21-02-2017', '13', '19', '41', '45', '49', '3', '4']
['17-02-2017', '19', '25', '33', '36', '48', '2', '9']
['14-02-2017', '2', '10', '24', '40', '44', '3', '10']
['10-02-2017', '7', '21', '26', '35', '43', '2', '9']
['07-02-2017', '4', '10', '31', '38', '44', '8', '10']
['03-02-2017', '3', '4', '15', '46', '50', '5', '9']
['31-01-2017', '3', '4', '17', '23', '44', '6', '9']
['27-01-2017', '17', '20', '28', '45', '48', '5', '9']
['24-01-2017', '1', '5', '7', '17', '23', '3', '8']
['20-01-2017', '10', '17', '27', '31', '49', '3', '5']
['17-01-2017', '4', '16', '25', '43', '47', '2', '10']
['13-01-2017', '3', '7', '16', '26', '50', '4', '7']
['10-01-2017', '2', '11', '29', '35', '44', '4', '9']
['06-01-2017', '10', '14', '18', '21', '49', '9', '11']
['03-01-2017', '19', '23', '27', '34', '49', '1', '11']