我在运行脚本时遇到了这个问题:
(我正在使用Spyder来构建脚本,但是我在Jupyter Notebook上尝试时遇到了相同的错误)
#STEP 3.8 - Get the URL request
LIMIT = 100
radius = 50
url = 'https://api.foursquare-com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID, CLIENT_SECRET, VERSION, neighbor_lat, neighbor_long, radius, LIMIT)
#STEP 3.9 - Get request and examinate the result
results = requests.get(url).json()
print(results)
ConnectionError:HTTPSConnectionPool(host ='api.foursquare-com',port = 443):URL超出最大重试次数:/ v2 / venues / explore?&client_id = xxx&client_secret = xxx&v = 20180605&ll = 43.806686299999996,-79.19435340000001 &radius = 500&limit = 100(由NewConnectionError引起(':建立新连接失败:[Errno 11001] getaddrinfo失败'))
答案 0 :(得分:0)
尝试在headers
中添加request.get
参数。
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
page = requests.get(url, headers=headers)
答案 1 :(得分:0)
尝试使用例外
from bs4 import BeautifulSoup
import requests
from requests import get
import sqlite3
import geopandas
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
#cafeNamesthornbury
def scrapecafes(city, area):
#url = 'https://www.broadsheet.com.au/melbourne/guides/best-cafes-thornbury' #go to the website
url = f"https://www.broadsheet.com.au/{city}/guides/best-cafes-{area}"
response = requests.get(url, timeout=5)
soup_cafe_names = BeautifulSoup(response.content, "html.parser")
type(soup_cafe_names)
cafeNames = soup_cafe_names.findAll('h2', attrs={"class":"venue-title", }) #scrape the elements
cafeNamesClean = [cafe.text.strip() for cafe in cafeNames] #clean the elements
cafeNameTuple = [(cafe,) for cafe in cafeNamesClean]
print(cafeNamesClean)
#addresses
soup_cafe_addresses = BeautifulSoup(response.content, "html.parser")
type(soup_cafe_addresses)
cafeAddresses = soup_cafe_addresses.findAll( attrs={"class":"address-content" })
cafeAddressesClean = [address.text for address in cafeAddresses]
cafeAddressesTuple = [(address,) for address in cafeAddressesClean]
print(cafeAddressesClean)
##geocode addresses
locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
for item in cafeAddressesClean:
location = locator.geocode(item)
lat = [location.latitude for loc in location]
long = [location.longitude for loc in location]
#zip up for table
fortable = zip(cafeNamesClean, cafeAddressesClean, lat, long)
print(fortable)
##connect to database
try:
sqliteConnection = sqlite3.connect('25july_database.db')
cursor = sqliteConnection.cursor()
print("Database created and Successfully Connected to 25july_database")
sqlite_select_Query = "select sqlite_version();"
cursor.execute(sqlite_select_Query)
record = cursor.fetchall()
print("SQLite Database Version is: ", record)
cursor.close()
except sqlite3.Error as error:
print("Error while connecting to sqlite", error)
#create table
try:
sqlite_create_table_query = ''' CREATE TABLE IF NOT EXISTS scraper (
name TEXT NOT NULL,
address TEXT NOT NULL,
latitude FLOAT NOT NULL,
longitude FLOAT NOT NULL
);'''
cursor = sqliteConnection.cursor()
print("Successfully Connected to SQLite")
cursor.execute(sqlite_create_table_query)
sqliteConnection.commit()
print("SQLite table created")
except sqlite3.Error as error:
print("Error while creating a sqlite table", error)
##enter data into table
try:
sqlite_insert_name_param = """INSERT INTO scraper
(name, address, latitude, longitude)
VALUES (?,?,?,?);"""
cursor.executemany(sqlite_insert_name_param, fortable)
sqliteConnection.commit()
print("Total", cursor.rowcount, "Records inserted successfully into table")
sqliteConnection.commit()
cursor.close()
except sqlite3.Error as error:
print("Failed to insert data into sqlite table", error)
finally:
if (sqliteConnection):
sqliteConnection.close()
print("The SQLite connection is closed")
scrapecafes('melbourne', 'thornbury')
答案 2 :(得分:0)
我在运行脚本时遇到了这个问题:
我正在使用pycharm构建脚本,但是我尝试进行上传(json-url,请求get,导入pTable),并且遇到相同的错误
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='api.kawalkorona.com', port=443): Max retries exceeded with url: /indonesia/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x03FFFEE0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))