如何处理“连接错误未正确处理”

时间:2020-04-29 21:01:51

标签: python error-handling beautifulsoup

由于我拨打该网站的电话数量过多(试图获取从2018年至今的所有过去数据),我收到的错误低于我认为的错误。

我得到的错误是-('连接被中止。',TimeoutError(10060,'连接尝试失败,因为连接的一方在一段时间后未正确响应,或者建立的连接失败,因为连接的主机未能响应',无,10060,无))

我是编码的新手-我尝试了sleep(30),但不确定如何才能更负责任地做到这一点?

from datetime import datetime, date, timedelta
import requests
import re
import csv
import os
import numpy
import pandas as pd
from bs4 import BeautifulSoup as bs
from simplified_scrapy import SimplifiedDoc,req,utils
import pyodbc
from time import sleep

#Write to CSV File
#file = open('harnessresults.csv', 'w', newline='', encoding='utf8')
#writer = csv.writer(file)

#Connect to SQL database
conn = pyodbc.connect("Driver={SQL Server};"
                     "Server=DESKTOP-KOOIS0J;"
                     "Database=Horses;"
                     "Trusted_Connection=yes;",
                     autocommit=True
                     )

mycursor = conn.cursor()

# mycursor.execute("ALTER TABLE horses ADD Venue VARCHAR(255)")


#conn.close()

base_url = "http://www.harness.org.au/racing/results/?firstDate="
base1_url = "http://www.harness.org.au"

webpage_response = requests.get('http://www.harness.org.au/racing/results/?firstDate=')

soup = bs(webpage_response.content, "html.parser")

format = "%d-%m-%y"
delta = timedelta(days=1)
yesterday = datetime.today() - timedelta(days=1)

enddate = datetime(2018, 1, 11)

#prints header in csv
#writer.writerow(['Date1', 'Venue', 'RaceNumber', 'RaceName', 'RaceTitle', 'RaceDistance', 'Place', 'HorseName', 'Prizemoney', 'Row', 'Trainer', 'Driver', 'Margin', 'StartingOdds', 'StewardsComments', 'Scratching', 'TrackRating', 'Gross_Time', 'Mile_Rate', 'Lead_Time', 'First_Quarter', 'Second_Quarter', 'Third_Quarter', 'Fourth_Quarter'])


while enddate <= yesterday:
    enddate += timedelta(days=1)
    enddate1 = enddate.strftime("%d-%m-%y") 
    new_url = base_url + str(enddate1)
    soup12 = requests.get(new_url)
    soup1 = bs(soup12.content, "html.parser") 
    table1 = soup1.find('table', class_='meetingListFull')

    tr = table1.find_all('tr', {'class':['odd', 'even']})
    sleep(30)
    for tr1 in tr:
        tr2 = tr1.find('a').get_text()
        tr3 = tr1.find('a')['href']
        newurl = base1_url + tr3
        with requests.Session() as s:
            webpage_response = s.get(newurl)
            soup = bs(webpage_response.content, "html.parser")
            #soup1 = soup.select('.content')
            results = soup.find_all('div', {'class':'forPrint'})
            resultsv2 = soup.find_all('table', {'class':'raceFieldTable'})


            #writer.writerow(['Date1', 'Venue', 'RaceNumber', 'RaceTitle', 'RaceDistance', 'Place', 'HorseName', 'Prizemoney', 'Row1', 'HorseNumber', 'Trainer', 'Driver', 'Margin', 'StartingOdds', 'StewardsComments', 'Scratching', 'TrackRating', 'Gross_Time', 'Mile_Rate', 'Lead_Time', 'First_Quarter', 'Second_Quarter', 'Third_Quarter', 'Fourth_Quarter'])

            for race in results:

                race_number = race.find('td', class_='raceNumber').get_text()



                race_name1 = race.find('td', class_='raceTitle').get_text()

                race_title1 = race.find('td', class_='raceInformation').get_text()
                race_title1 = ' '.join(race_title1.split())

0 个答案:

没有答案