感谢您的光临...我非常感谢您的帮助。我试图抓住简单的craigslist列表,这段代码不能工作......请帮忙!返回空列表[] ... 请帮忙... 代码如下:
from robobrowser import RoboBrowser
import sys, codecs, locale
import pandas as pd
browser = RoboBrowser(history=True, parser='html.parser')
def getTrips(website) :
browser.open(website)
trips = browser.find_all(class_='result-info')
data = []
for trip in trips:
title = get_title(trip)
url = get_url(trip)
data.append({
"title": title,
"url": url,
"website": website
})
next_page = browser.get_link('next >')
if next_page:
getTrips(browser._build_url(next_page.get('href')))
return data
def get_title(trip):
if trip.find(class_='result-title hdrlnk'):
return trip.find(class_='result-title hdrlnk').text
else:
return "Title not found"
def get_url(trip):
if trip.find(class_='result-info'):
return item.find('a').get('href')
else:
return "URL not found"
total = []
total.extend(getTrips('https://newyork.craigslist.org/search/bbb?query=photographer&sort=rel'))
print(total)
df = pd.DataFrame(total)
df.to_csv('photographer_data.csv', index=False)