我创建了一个python脚本,可以从here下载案例文件的FAA档案。当作为py文件运行时,脚本按预期工作。我用pyinstaller创建了一个可执行文件,并在尝试下载第一个csv时出现以下错误。我的第一个想法是公司防火墙阻止它连接,但就像我说的,它作为py文件运行时工作正常。
错误:
$ dist/casefiles/casefiles.exe
Downloading region AAL
Traceback (most recent call last):
File "urllib\request.py", line 1318, in do_open
File "http\client.py", line 1239, in request
File "http\client.py", line 1285, in _send_request
File "http\client.py", line 1234, in endheaders
File "http\client.py", line 1026, in _send_output
File "http\client.py", line 964, in send
File "http\client.py", line 1392, in connect
File "http\client.py", line 936, in connect
File "socket.py", line 704, in create_connection
File "socket.py", line 743, in getaddrinfo
socket.gaierror: [Errno 11003] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "casefiles.py", line 139, in <module>
main()
File "casefiles.py", line 37, in main
download_case_files()
File "casefiles.py", line 57, in download_case_files
urlretrieve(URL.format(region), CASE_FILES_PATH.format(region))
File "urllib\request.py", line 248, in urlretrieve
File "urllib\request.py", line 223, in urlopen
File "urllib\request.py", line 526, in open
File "urllib\request.py", line 544, in _open
File "urllib\request.py", line 504, in _call_chain
File "urllib\request.py", line 1361, in https_open
File "urllib\request.py", line 1320, in do_open
urllib.error.URLError: <urlopen error [Errno 11003] getaddrinfo failed>
[1080] Failed to execute script casefiles
代码:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import csv
import pathlib
from urllib.error import HTTPError
from urllib.request import urlretrieve
from datetime import date, timedelta
from collections import deque
URL = "https://oeaaa.faa.gov/oeaaa/external/ExternalCaseDownloadServlet?emaNelif=OffAirport{}2018List.gzip"
CASE_FILES_PATH = "./CaseFiles/OffAirport{}2018List.csv"
DATE = date.today()
TIME_FRAME = (DATE - date(2018, 1, 1)).days
# TIME_FRAME is used to determine how recent a case files needs to be to be included
OUTPUT_PATH = "./Results/FAA Antenna Towers {}_{:02d}_{:02d}.csv".format((DATE.year % 100), DATE.month, DATE.day)
REGIONS = ["AAL", "ACE", "AEA", "AGL", "ANE", "ANM", "ASO", "ASW", "AWP", "WTE", "WTW"]
FILTERS = {"FILE DATE": 0, "NOTICE OF": "Construction", "DATE BUILT": "", "STRUCTURE TYPE": "Antenna Tower"}
# DATE_BUILT must NOT be empty, if it contains a 2 it is not empty, 2 will be included becuase of the year
DESIRED_HEADERS = ["STUDY (ASN)", "PRIOR ASN", "RECEIVED DATE", "LATITUDE", "LONGITUTDE", "STRUCTURE NAME",
"STRUCTURE CITY", "STRUCTURE STATE", "PROPOSAL DESCRIPTION", "LOCATION DESCRIPTION",
"NOTICE OF", "DURATION", "WORK SCHEDULE BEGINNING DATE", "WORK SCHEDULE ENDING DATE",
"DATE BUILT", "FCC NUMBER", "STRUCTURE TYPE", "AGL HEIGHT DET", "AGL HEIGHT PROPOSED",
"REPRESENTATIVE ATTN OF ", "REPRESENTATIVE CITY ", "REPRESENTATIVE COUNTRY ",
"REPRESENTATIVE EMAIL ", "REPRESENTATIVE NAME ", "REPRESENTATIVE FAX ",
"REPRESENTATIVE PRIMARY PHONE ", "REPRESENTATIVE PRIMARY PHONE EXT ", "REPRESENTATIVE STATE ",
"REPRESENTATIVE ADDRESS 1 ", "REPRESENTATIVE ADDRESS 2 ", "REPRESENTATIVE POSTAL CODE ",
"SPONSOR ATTN OF ", "SPONSOR CITY ", "SPONSOR COUNTRY ", "SPONSOR EMAIL ", "SPONSOR NAME ",
"SPONSOR FAX ", "SPONSOR PRIMARY PHONE ", "SPONSOR PRIMARY PHONE EXT ", "SPONSOR STATE ",
"SPONSOR ADDRESS 1 ", "SPONSOR ADDRESS 2 ", "SPONSOR POSTAL CODE ", "SIGNATURE CONTROL NUMBER "]
def main():
""" main funciton """
pathlib.Path('./CaseFiles').mkdir(parents=True, exist_ok=True)
pathlib.Path('./Results').mkdir(parents=True, exist_ok=True)
# Create output directories if they don't exist
download_case_files()
with open(OUTPUT_PATH, 'w', newline='') as outfile:
writer = csv.writer(outfile)
writer.writerow(DESIRED_HEADERS)
for region in REGIONS:
with open(CASE_FILES_PATH.format(region), 'r', encoding="utf-8") as infile:
timestamp = str(deque(csv.reader(infile), 1)[0])
index = timestamp.find("of")
file_date = timestamp[index + 3:index + 13]
FILTERS["FILE DATE"] = get_file_date(file_date)
regional_case_files = read_file(region)
write_to_file(region, regional_case_files)
print()
def download_case_files():
""" Download case files for each region """
for region in REGIONS:
print("Downloading region {}".format(region))
# TODO: Design a way to handle these Error Codes: 500, 504
try:
urlretrieve(URL.format(region), CASE_FILES_PATH.format(region))
except HTTPError as err:
if err.code == 503:
retry_download(region, 2)
else:
print(str(err))
print()
def retry_download(region, attempt):
""" Retry downloading a casefile when previous attempt failed """
try:
print("Attempt number {}".format(attempt))
urlretrieve(URL.format(region), CASE_FILES_PATH.format(region))
except HTTPError as err:
if err.code == 503:
retry_download(region, attempt + 1)
else:
print(str(err))
def read_file(region):
""" Read case files and add desired ones to list """
print("Reading region {}".format(region))
regional_case_files = []
with open(CASE_FILES_PATH.format(region), 'r', encoding="utf-8") as infile:
reader = csv.reader(infile)
headers = next(reader)
indices = find_filter_indices(headers)
for row in reader:
if "OE" in row[0]:
if verify_case_file(indices, row):
case_file = truncate_case_file(headers, row)
regional_case_files.append(case_file)
return regional_case_files
def write_to_file(region, case_files):
""" Write list of desired case files to new file """
print("Writing region {}".format(region))
if not isinstance(case_files, list):
return
with open(OUTPUT_PATH, 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for case in case_files:
writer.writerow(case)
def find_filter_indices(headers):
""" Create typle of indices for headers to use for filtering """
ed = headers.index("RECEIVED DATE")
no = headers.index("NOTICE OF")
db = headers.index("DATE BUILT")
st = headers.index("STRUCTURE TYPE")
return tuple([ed, no, db, st])
def truncate_case_file(headers, case_file):
""" Create list of desired headers """
truncated_case_file = []
for i, header in enumerate(headers):
if header in DESIRED_HEADERS:
truncated_case_file.append(case_file[i])
return truncated_case_file
def verify_case_file(indicies, case_file):
""" Check if the case file meets criteria to be relevant """
file_date = get_file_date(case_file[indicies[0]])
if FILTERS["FILE DATE"] - file_date <= timedelta(TIME_FRAME) and \
FILTERS["NOTICE OF"] == case_file[indicies[1]] and \
FILTERS["DATE BUILT"] in case_file[indicies[2]] and \
FILTERS["STRUCTURE TYPE"] == case_file[indicies[3]]:
return True
return False
def get_file_date(timestamp):
""" Return timesamp variable as a datetime object """
if '/' in timestamp:
file_date = timestamp.split('/')
year, month, day = int(file_date[2]), int(file_date[0]), int(file_date[1])
elif '-' in timestamp:
file_date = timestamp.split('-')
year, month, day = int(file_date[0]), int(file_date[1]), int(file_date[2])
else:
year, month, day = 1, 1, 1
return date(year, month, day)
main()