Question

我在运行脚本时遇到了这个问题：

（我正在使用Spyder来构建脚本，但是我在Jupyter Notebook上尝试时遇到了相同的错误）

#STEP 3.8 - Get the URL request

LIMIT = 100

radius = 50

url = 'https://api.foursquare-com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(

CLIENT_ID, CLIENT_SECRET, VERSION, neighbor_lat, neighbor_long, radius, LIMIT)

#STEP 3.9 - Get request and examinate the result

results = requests.get(url).json()

print(results)

ConnectionError：HTTPSConnectionPool（host ='api.foursquare-com'，port = 443）：URL超出最大重试次数：/ v2 / venues / explore？＆client_id = xxx＆client_secret = xxx＆v = 20180605＆ll = 43.806686299999996，-79.19435340000001 ＆radius = 500＆limit = 100（由NewConnectionError引起（'：建立新连接失败：[Errno 11001] getaddrinfo失败'））

Answer 1

尝试在headers中添加request.get参数。

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}

page = requests.get(url, headers=headers)

Answer 2

尝试使用例外

from bs4 import BeautifulSoup
import requests
from requests import get
import sqlite3
import geopandas
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

#cafeNamesthornbury
def scrapecafes(city, area):

    #url = 'https://www.broadsheet.com.au/melbourne/guides/best-cafes-thornbury' #go to the website
    url = f"https://www.broadsheet.com.au/{city}/guides/best-cafes-{area}"
    response = requests.get(url, timeout=5)

    soup_cafe_names = BeautifulSoup(response.content, "html.parser")
    type(soup_cafe_names)

    cafeNames = soup_cafe_names.findAll('h2', attrs={"class":"venue-title", }) #scrape the elements
    cafeNamesClean = [cafe.text.strip() for cafe in cafeNames] #clean the elements
    cafeNameTuple = [(cafe,) for cafe in cafeNamesClean]

    print(cafeNamesClean)

    #addresses
    soup_cafe_addresses = BeautifulSoup(response.content, "html.parser")
    type(soup_cafe_addresses)

    cafeAddresses = soup_cafe_addresses.findAll( attrs={"class":"address-content" })
    cafeAddressesClean = [address.text for address in cafeAddresses]
    cafeAddressesTuple = [(address,) for address in cafeAddressesClean]

    print(cafeAddressesClean)


    ##geocode addresses
    locator = Nominatim(user_agent="myGeocoder")
    geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

    for item in cafeAddressesClean:
        location = locator.geocode(item)

        lat = [location.latitude for loc in location]
        long = [location.longitude for loc in location]

    #zip up for table
    fortable = zip(cafeNamesClean, cafeAddressesClean, lat, long)
    print(fortable)

##connect to database
    try:
        sqliteConnection = sqlite3.connect('25july_database.db')
        cursor = sqliteConnection.cursor()
        print("Database created and Successfully Connected to 25july_database")

        sqlite_select_Query = "select sqlite_version();"
        cursor.execute(sqlite_select_Query)
        record = cursor.fetchall()
        print("SQLite Database Version is: ", record)
        cursor.close()

    except sqlite3.Error as error:
        print("Error while connecting to sqlite", error)

    #create table
    try:
        sqlite_create_table_query = ''' CREATE TABLE IF NOT EXISTS scraper (
                                        name TEXT NOT NULL,
                                        address TEXT NOT NULL,
                                        latitude FLOAT NOT NULL,
                                        longitude FLOAT NOT NULL
                                        );'''

        cursor = sqliteConnection.cursor()
        print("Successfully Connected to SQLite")
        cursor.execute(sqlite_create_table_query)
        sqliteConnection.commit()
        print("SQLite table created")

    except sqlite3.Error as error:
        print("Error while creating a sqlite table", error)

##enter data into table
    try:
        sqlite_insert_name_param = """INSERT INTO scraper
                            (name, address, latitude, longitude)
                            VALUES (?,?,?,?);"""

        cursor.executemany(sqlite_insert_name_param, fortable)

        sqliteConnection.commit()
        print("Total", cursor.rowcount, "Records inserted successfully into table")
        sqliteConnection.commit()

        cursor.close()

    except sqlite3.Error as error:
        print("Failed to insert data into sqlite table", error)

    finally:
        if (sqliteConnection):
            sqliteConnection.close()
            print("The SQLite connection is closed")

scrapecafes('melbourne', 'thornbury')

Answer 3

我在运行脚本时遇到了这个问题：

我正在使用pycharm构建脚本，但是我尝试进行上传（json-url，请求get，导入pTable），并且遇到相同的错误

requests.exceptions.ConnectionError: HTTPSConnectionPool(host='api.kawalkorona.com', port=443): Max retries exceeded with url: /indonesia/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x03FFFEE0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

Python：ConnectionError：HTTPSConnectionPool（主机='api.foursquare-com'，端口= 443）

3 个答案: