嗨,我刚刚开始通过教程学习一些编码并开始练习,但是在尝试运行测试时遇到错误。请在下面查看我的代码。希望能得到一些帮助。
以下是我的代码:
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
req = Request('https://www.niche.com/k12/search/best-schools/s/arkansas/?gradeLevel=middle&gradeLevel=high&type=traditional&type=charter&type=magnet&type=private', headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
req.close()
page_soup = soup(webpage, "html.parser")
card = page_soup.findAll("div",{"class":"card"})
csv_file = open('headmasters_scrape.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['School', 'Niche_Grade', 'School_Type'])
for cards in card:
Item_1 = cards.findAll("h2", {"class":"search-result__title"})
School = Item_1[0].text
Item_2 = cards.findAll("figure", {"class":"search-result-grade"})
Niche_Grade = Item_2[0].text
Item_3 = cards.findAll("li", {"class":"search-result-tagline__item"})
School_Type = Item_3[0].text
print("School: " + School)
print("Niche_Grade: " + Niche_Grade)
print("School_Type: " + School_Type)
print()
csv_writer.writerow([School, Niche_Grade, School_Type])
csv_file.close()
我得到的错误如下:
Traceback (most recent call last):
File "C:\Users\Amdin\Downloads\Webscrape\Edited_Version.py", line 4, in
<module>
webpage = urlopen(req).read()
File "C:\Users\Amdin\Anaconda3\lib\urllib\request.py", line 222, in
urlopen
return opener.open(url, data, timeout)
File "C:\Users\Amdin\Anaconda3\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Users\Amdin\Anaconda3\lib\urllib\request.py", line 548, in
_open
'unknown_open', req)
File "C:\Users\Amdin\Anaconda3\lib\urllib\request.py", line 503, in
_call_chain
result = func(*args)
File "C:\Users\Amdin\Anaconda3\lib\urllib\request.py", line 1387, in
unknown_open
raise URLError('unknown url type: %s' % type)
urllib.error.URLError: <urlopen error unknown url type: https>
答案 0 :(得分:0)
from bs4 import BeautifulSoup
import requests
import pandas as pd
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0'}
r = requests.get("https://www.niche.com/k12/search/best-schools/s/arkansas/?gradeLevel=middle&gradeLevel=high&type=traditional&type=charter&type=magnet&type=private", headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
name = []
grade = []
sctype = []
for item in soup.findAll("h2", {'class': 'search-result__title'}):
name.append(item.text)
for item in soup.select("div[class^=niche__grade]"):
grade.append(item.text)
for item in soup.findAll("ul", {'class': 'search-result-tagline'}):
sctype.append(item.next_element.text)
data = []
for a, b, c in zip(name, grade, sctype):
lol = a, b, c
data.append(lol)
df = pd.DataFrame(data, columns=["School", "Niche_Grade", "School_Type"])
df.to_csv('data.csv', index=False)
输出:check online