Hey Guys我已经自动化了脚本以从下面的网站中提取内容。
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from datetime import datetime
import dateutil.parser
import urllib2
import requests
import sys
import re, clr
import csv
import pandas as pd
from selenium import webdriver
from ast import literal_eval
import shorten
chrome_path = r"/usr/bin/chromedriver"
driver = webdriver.Chrome(chrome_path)
driver.get("http://snowload.atcouncil.org/") # opening the site
driver.find_element_by_xpath(
"""//*[@id="adminForm"]/fieldset/div/div[2]/div[2]/label""").click() # click the radio button
driver.find_element_by_xpath("""//*[@id="coordinate_address"]""").click() # clicking the textbox
cities = [' Aguila,Arizona'] # city list
for city in cities:
# print (city)
driver.find_element_by_xpath('//*[@id="coordinate_address"]').send_keys(city) # passing cities
driver.find_element_by_xpath('//*[@id="adminForm"]/fieldset/div/div[2]/button').click()
x = driver.current_url
# print x
Data = {'optionCoordinate': '2', 'coordinate_address': cities}
page = requests.post(x, data=Data)
soup = BeautifulSoup(page.content, 'html.parser')
div = soup.find('div', attrs={'class': 'span5'})
data = []
new_list = []
def remove_from_list(l, x):
new_list = [li.replace(x, '') for li in l]
return new_list
def flatten(iterable):
result = []
for item in iterable:
try:
item_eval = literal_eval(item)
if not isinstance(item_eval, list):
raise ValueError()
except (ValueError, SyntaxError):
result.append(item)
else:
result.extend(flatten(item_eval))
return result
def shorten(s, subs):
i = s.index(subs)
return s[:i + len(subs)]
csvList = []
for li in div.find_all('li', limit=6):
extracted_data = li.get_text().encode("ascii", "ignore")
data = extracted_data.split(":")
data = remove_from_list(data, u'\xa0')
data = remove_from_list(data, u'\r')
data = remove_from_list(data, u'\n')
#csvList.append(str(data))
if len(data) is 2:
print str(data[1])
csvList.append(str(data[1]))
else:
print str(data[1:3])
csvList.append(str(data[1:3]))
csvList = flatten(csvList)
csvList = [str(csvList[x]) for x in range(len(csvList))]
datetime_object = datetime.strptime(csvList[0], '%B %d, %Y').date()
datetime_object = datetime.strptime(str(datetime_object), '%Y-%m-%d').strftime('%d/%m/%y')
print datetime_object
csvList[0] = str(datetime_object)
csvList[4] = re.sub("\D","",csvList[4])
csvList[6] = re.sub("\D","",csvList[6])
#head, sep, tail = csvList[5].partition('Any')
#csvList[5] = head
csvList[5] = shorten(csvList[5], 'Load')
#del csvList[4]
print csvList
with open('/home/priyanka/Desktop/output.csv','w') as file:
wr = csv.writer(file, quoting=csv.QUOTE_ALL)
wr.writerow(csvList)
driver.close()
此代码将提取内容。但我需要的是基于海拔: 2170.4英尺内容 它应该只需海拔≤3,000英尺:地面雪荷载为0 psf 在这三个中。
海拔高度≤3,000英尺:地面雪荷载为0 psf
海拔> 3,000和≤4,500英尺:地面雪荷载为5 psf
海拔> 4,500和≤5,400英尺:地面雪荷载为10 psf
所以基于Elevation值,它应该与Elevation feet进行比较,并取相应的内容
最后输出应该看起来像
['01/09/17','Aguila,Arizona','33 .9428069',' - 113.1740805','21704','ASCE 7 *地面积雪','0']
请帮帮我们。在此先感谢。