所以我正在构建一个搜寻器,它可以将数据很好地打印到控制台,甚至可以创建所需的csv文件-但该文件为空。数据和输出文件之间显然存在通信错误,但是我看不到我的错误。 我已经使用PyCharm进行过检查,但是并没有显示任何内容:(。
我包含了我的代码(只是更改了我要从其抓取的网站),并且将非常感谢提供指导的每个人!
from random import randint
from typing import List
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
session = requests.Session()
path_to_chromedriver = '/Users/me/Desktop/chromedriver.exe'
browser = webdriver.Chrome(executable_path = path_to_chromedriver)
headers = {"user-agent": "Chrome/67.0.3396.87"}
dns = 'https://www.example.com/'
url = dns + 'specific-profile-page/'
def r_sleep(sleep=None, mylog=None):
length: float = float(randint(2000, 10000)) / 1000
mylog("Safety Random Sleep has started for {0} sec".format(length))
sleep(length)
mylog("Safety Random Sleep is over")
browser.get(url)
elements = browser.find_elements_by_link_text('Profile')
links = []
class CrawledDoctor:
def __init__(self, title, special, partner, street, area, phone, web):
self.title = title
self.special = special
self.partner = partner
self.street = street
self.area = area
self.phone = phone
self.web = web
class DoctorFetcher:
def fetch(self):
doctors: List[CrawledDoctor] = []
while url != "":
print("Current page ", url)
r = requests.get(url, headers=headers)
print(r.status_code)
if r.status_code != 200:
print('request returns invalid status code')
for i in range(len(elements)):
links.append(elements[i].get_attribute('href'))
for link in links:
print('navigating to: ' + link)
new_page = browser.get(link)
# do stuff within that page here...
if new_page is None:
print("Start scraping ...")
soup = BeautifulSoup(browser.page_source, 'lxml', exclude_encodings=["ISO-8859-7"])
for info in soup.select('#profil_name_adresse'):
try:
title = info.select_one('h1').text
print(title)
special = info.select_one('p.grau').text
print(special)
try:
partner = info.find('a.link-black').attrs[
'href'].text
print(partner)
except AttributeError:
partner = print('no partner')
street = info.find('p', {'class': 'grau'}).next_sibling.contents[0]
print(street)
area = info.find('p', {'class': 'grau'}).next_sibling.contents[2]
print(area)
phone = info.find('p', {'class': 'grau'}).next_sibling.next_sibling.next_sibling.contents[0]
print(phone)
web = info.find('p', {'class': 'grau'}).next_sibling.next_sibling.next_sibling.a.text
print(web)
except AttributeError:
print("Keine Angabe")
crawled = CrawledDoctor(title, special, partner, street, area, phone, web)
doctors.append(crawled)
browser.back()
return doctors
import csv
fetcher = DoctorFetcher()
with open('myfile.csv', 'w', newline='') as csvfile:
doctorwriter = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for doctor in fetcher.fetch:
doctorwriter.writerow(
[doctor.title, doctor.special, doctor.partner, doctor.street, doctor.area, doctor.phone, doctor.web])
<code>
-