刮过的数据在控制台中打印,但不在csv文件中打印

时间:2018-08-08 14:17:30

标签: python selenium web-crawler export-to-csv

大家好,SO的忠实粉丝

所以我正在构建一个搜寻器,它可以将数据很好地打印到控制台,甚至可以创建所需的csv文件-但该文件为空。数据和输出文件之间显然存在通信错误,但是我看不到我的错误。 我已经使用PyCharm进行过检查,但是并没有显示任何内容:(。

我包含了我的代码(只是更改了我要从其抓取的网站),并且将非常感谢提供指导的每个人!


from random import randint from typing import List import requests from bs4 import BeautifulSoup from selenium import webdriver session = requests.Session() path_to_chromedriver = '/Users/me/Desktop/chromedriver.exe' browser = webdriver.Chrome(executable_path = path_to_chromedriver) headers = {"user-agent": "Chrome/67.0.3396.87"} dns = 'https://www.example.com/' url = dns + 'specific-profile-page/' def r_sleep(sleep=None, mylog=None): length: float = float(randint(2000, 10000)) / 1000 mylog("Safety Random Sleep has started for {0} sec".format(length)) sleep(length) mylog("Safety Random Sleep is over") browser.get(url) elements = browser.find_elements_by_link_text('Profile') links = [] class CrawledDoctor: def __init__(self, title, special, partner, street, area, phone, web): self.title = title self.special = special self.partner = partner self.street = street self.area = area self.phone = phone self.web = web class DoctorFetcher: def fetch(self): doctors: List[CrawledDoctor] = [] while url != "": print("Current page ", url) r = requests.get(url, headers=headers) print(r.status_code) if r.status_code != 200: print('request returns invalid status code') for i in range(len(elements)): links.append(elements[i].get_attribute('href')) for link in links: print('navigating to: ' + link) new_page = browser.get(link) # do stuff within that page here... if new_page is None: print("Start scraping ...") soup = BeautifulSoup(browser.page_source, 'lxml', exclude_encodings=["ISO-8859-7"]) for info in soup.select('#profil_name_adresse'): try: title = info.select_one('h1').text print(title) special = info.select_one('p.grau').text print(special) try: partner = info.find('a.link-black').attrs[ 'href'].text print(partner) except AttributeError: partner = print('no partner') street = info.find('p', {'class': 'grau'}).next_sibling.contents[0] print(street) area = info.find('p', {'class': 'grau'}).next_sibling.contents[2] print(area) phone = info.find('p', {'class': 'grau'}).next_sibling.next_sibling.next_sibling.contents[0] print(phone) web = info.find('p', {'class': 'grau'}).next_sibling.next_sibling.next_sibling.a.text print(web) except AttributeError: print("Keine Angabe") crawled = CrawledDoctor(title, special, partner, street, area, phone, web) doctors.append(crawled) browser.back() return doctors import csv fetcher = DoctorFetcher() with open('myfile.csv', 'w', newline='') as csvfile: doctorwriter = csv.writer(csvfile, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) for doctor in fetcher.fetch: doctorwriter.writerow( [doctor.title, doctor.special, doctor.partner, doctor.street, doctor.area, doctor.phone, doctor.web]) <code> -

0 个答案:

没有答案