为WOEID刮刮Woeidlookup.com

时间:2017-12-04 21:09:59

标签: python python-3.x

我试图制造或多或少的天气检查器。我为此抓了雅虎,因为我无法弄清楚如何使用他们的API加上它对我来说似乎更有趣。我没有使用Python3编写很长时间。所有这些代码都是在RPI上完成的,它使用的Python3版本是3.4.2。

我遇到的问题是获取WOEID号码。我认为抓Woeidlookup.com是一个很容易获得数字的任务。但是,现在我尝试了各种方法从网站获取数据后,我在这里。我没有任何代码可以显示为抓取这个网站,好像它不起作用我只是删除它。

我相信这里有人知道如何输入所需的文字  网站的字段然后提交。一旦提交,我必须刮掉显示在字段下方的表格以获得WOEID编号。

我已经完成了以下发布的雅虎天气的刮擦。任何对学习代码的批评都将受到赞赏。在过去的几周里,我在这个网站上发现了大量有用的信息,而且大部分知识都进入了这个项目。如果我决定稍后在表中添加而不是当前的print()设置,则导入pandas。

import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime

baseurl = "https://www.yahoo.com/news/weather"
Country = input("Country : ").replace(" ", "-")
State = input("State : ").replace(" ", "-")
Town = input("Town : ").replace(" ", "-")
WOEID = "" # this is what I need to figure out

url = '%(U)s/%(C)s/%(S)s/%(T)s-%(W)s' % {'U': baseurl, "C": Country, "S": State, "T": Town, "W": WOEID}

now = datetime.datetime.now()
hour = 0
minute = now.minute
AMPM = ""
ToD = ""

page = requests.get(url)
code =  page.status_code
content = page.content
soup = BeautifulSoup(content, 'html.parser')
weather = soup.find(class_="weather")
temp = soup.find(class_="temperature Fz(14px) Tsh($temperature-text-shadow)")
wind = weather.find(id="weather-wind")
sunMoon = weather.find(id='weather-sun-moon')
details = weather.find(id="weather-detail")
detail = details.find(class_="detail")
start = detail.find_all(class_="Fl(start)")
end = detail.find_all(class_="Fl(end)")
desc = detail.find(class_="description Py(10px) Px(4px) Fz(1em)")
dayNight = desc.find_all(class_="day-description My(10px)")
windText = wind.find('span', {'data-reactid': '455'}).text
windSpeedDir = wind.find('p', {'data-reactid': '456'}).text
baro = wind.find('span', {'data-reactid': '461'}).text
pressure = wind.find('p', {'data-reactid': '462'}).text
phase = sunMoon.find('div', {'data-reactid': '504'}).text
dayDesc = temp.find(class_="description Va(m) Px(2px) Fz(1.3em)--sm Fz(1.6em)")
currentTemp = temp.find('span', {'data-reactid': '37'}).text
degreeSymbol = temp.find('span', {'data-reactid': '38'}).text
high = temp.find('span', {'data-reactid': '29'}).text
low = temp.find('span', {'data-reactid': '33'}).text
city = weather.find(class_="city Fz(2em)--sm Fz(3.7em)--lg Fz(3.3em) Fw(n) M(0) Trsdu(.3s) desktop_Lh(1) smartphone_Lh(1)")
country = weather.find(class_="Fz(1.2em)--sm Fz(2em)--lg Fz(1.5em) Fw(200) country Trsdu(.3s) Lh(2.5)")
dateTime = weather.find(class_="Lts(1px) Fz(14px) Fs(i) Lh(2.5) Fw(300) Tsh($temperature-text-shadow)")

try:
    sunrise = sunMoon.find('span', {'data-reactid': '510'}).text
except:
    sunrise = sunMoon.find('span', {'data-reactid': '515'}).text
try:
    sunset = sunMoon.find('span', {'data-reactid': '511'}).text
except:
    sunset = sunMoon.find('span', {'data-reactid': '516'}).text

sunrise1 = sunrise.split(" ")
sunrise2 = sunrise1[0].split(":")
srHour = int(sunrise2[0])
srMinute = int(sunrise2[1])
sunset1 = sunset.split(" ")
sunset2 = sunset1[0].split(":")
ssHour = int(sunset2[0])
ssMinute = int(sunset2[1])

if now.hour > 12:
    hour = now.hour - 12
    AMPM = "PM"
else:
    hour = now.hour
    AMPM = "AM"

if now.hour >= srHour and now.hour < 12:
    ToD = "morning"
if now.hour >= 12 and now.hour - 12 < ssHour:
    ToD = "afternoon"
if now.hour > 12 and now.hour - 12 >= ssHour:
    ToD = "night"

print('Weather data for the city of', city.get_text(), 'in the country of', country.get_text())
print('Today is', "%02d" % (now.month), '/', "%02d" % (now.day), '/', now.year)
print('The current time is', "%02d" % (hour), ":", "%02d" % (now.minute), ":", "%02d" % (now.second), AMPM)
print('Right now it is a', dayDesc.get_text().lower(), ToD, 'sky.')
print('It is currently', currentTemp, degreeSymbol, 'F.')
print('The high for today is', high, 'F.')
print('The low for today is', low, 'F.')
print(start[0].get_text(), ' : ', end[0].get_text())
print(start[1].get_text(), '   : ', end[1].get_text())
print(start[2].get_text(), ' : ', end[2].get_text())
print(start[3].get_text(), '   : ', end[3].get_text())
print(windText, "       : ", windSpeedDir)
print(baro, "  : ", pressure)
print("Moon Phase  : ", phase)
print("Sunrise     : ", sunrise)
print("Sunset      : ", sunset)
print(dayNight[0].get_text())
print(dayNight[1].get_text())

1 个答案:

答案 0 :(得分:1)

经过大量编码,搜索等。我已经通过使用不同的网站来解决我遇到的问题,以获取WOEID和数据。下面是我为我正在为学校工作的项目基础所完成的脚本。我将它拆分为3个文件,并且在整个代码中都没有记录,但它应该很容易理解。希望这可以帮助那些也做这样的事情的人,即使它可能编码很差。

主脚本

import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
import WeatherAppBS4_Woeid as woeid

baseurl = "https://www.yahoo.com/news/weather"
Country = woeid.Country.replace(" ", "-")
State = woeid.State.replace(" ", "-")
Town = woeid.Town.replace(" ", "-")
WOEID = woeid.WOEID

url = '%(U)s/%(C)s/%(S)s/%(T)s-%(W)s' % {'U': baseurl, "C": Country, "S": State, "T": Town, "W": WOEID}

now = datetime.datetime.now()
hour = 0
minute = now.minute
AMPM = ""
ToD = ""

page = requests.get(url)
code =  page.status_code
content = page.content
soup = BeautifulSoup(content, 'html.parser')
weather = soup.find(class_="weather")
temp = soup.find(class_="temperature Fz(14px) Tsh($temperature-text-shadow)")
wind = weather.find(id="weather-wind")
sunMoon = weather.find(id='weather-sun-moon')
details = weather.find(id="weather-detail")
detail = details.find(class_="detail")
start = detail.find_all(class_="Fl(start)")
end = detail.find_all(class_="Fl(end)")
desc = detail.find(class_="description Py(10px) Px(4px) Fz(1em)")
dayNight = desc.find_all(class_="day-description My(10px)")
windText = wind.find('span', {'data-reactid': '455'}).text
windSpeedDir = wind.find('p', {'data-reactid': '456'}).text
baro = wind.find('span', {'data-reactid': '461'}).text
pressure = wind.find('p', {'data-reactid': '462'}).text
phase = sunMoon.find('div', {'data-reactid': '504'}).text
dayDesc = temp.find(class_="description Va(m) Px(2px) Fz(1.3em)--sm Fz(1.6em)")
currentTemp = temp.find('span', {'data-reactid': '37'}).text
degreeSymbol = temp.find('span', {'data-reactid': '38'}).text
high = temp.find('span', {'data-reactid': '29'}).text
low = temp.find('span', {'data-reactid': '33'}).text
city = weather.find(class_="city Fz(2em)--sm Fz(3.7em)--lg Fz(3.3em) Fw(n) M(0) Trsdu(.3s) desktop_Lh(1) smartphone_Lh(1)")
country = weather.find(class_="Fz(1.2em)--sm Fz(2em)--lg Fz(1.5em) Fw(200) country Trsdu(.3s) Lh(2.5)")
dateTime = weather.find(class_="Lts(1px) Fz(14px) Fs(i) Lh(2.5) Fw(300) Tsh($temperature-text-shadow)")

try:
    sunrise = sunMoon.find('span', {'data-reactid': '510'}).text
except:
    sunrise = sunMoon.find('span', {'data-reactid': '515'}).text
try:
    sunset = sunMoon.find('span', {'data-reactid': '511'}).text
except:
    sunset = sunMoon.find('span', {'data-reactid': '516'}).text

sunrise1 = sunrise.split(" ")
sunrise2 = sunrise1[0].split(":")
srHour = int(sunrise2[0])
srMinute = int(sunrise2[1])
sunset1 = sunset.split(" ")
sunset2 = sunset1[0].split(":")
ssHour = int(sunset2[0])
ssMinute = int(sunset2[1])

if now.hour > 12:
    hour = now.hour - 12
    AMPM = "PM"
else:
    hour = now.hour
    AMPM = "AM"

if now.hour >= srHour and now.hour < 12:
    ToD = "morning"
if now.hour >= 12 and now.hour < ssHour:
    ToD = "afternoon"
if now.hour > 12 and now.hour >= ssHour:
    ToD = "night"

print('Weather data for the city of', city.get_text(), 'in the country of', country.get_text())
print('Today is', "%02d" % (now.month), '/', "%02d" % (now.day), '/', now.year)
print('The current time is', "%02d" % (hour), ":", "%02d" % (now.minute), ":", "%02d" % (now.second), AMPM)
print('Right now it is a', dayDesc.get_text().lower(), ToD, 'sky.')
print('It is currently', currentTemp, degreeSymbol, 'F.')
print('The high for today is', high, 'F.')
print('The low for today is', low, 'F.')
print(start[0].get_text(), ' : ', end[0].get_text())
print(start[1].get_text(), '   : ', end[1].get_text())
print(start[2].get_text(), ' : ', end[2].get_text())
print(start[3].get_text(), '   : ', end[3].get_text())
print(windText, "       : ", windSpeedDir)
print(baro, "  : ", pressure)
print("Moon Phase  : ", phase)
print("Sunrise     : ", sunrise)
print("Sunset      : ", sunset)
print(dayNight[0].get_text())
print(dayNight[1].get_text())

WOEID脚本

import requests
import bs4 as BeautifulSoup
import pandas as pd
import WeatherAppMECH as mech

page = requests.get(mech.url)
code =  page.status_code
content = page.content
soup = BeautifulSoup.BeautifulSoup(content, 'html.parser')
search = soup.find(id="content")
results = search.find('div', {'id': 'lookup_result'})

rows=list()
for tr in results.find('table', {'id':'woeid_results_table'}):
    for td in tr:
        rows.append(td.string)
resultList=rows[1:]

Town = resultList[0]
State = resultList[1]
Country = resultList[2]
WOEID = resultList[3]

MechanicalSoup脚本

import requests
import mechanicalsoup
import bs4 as BeautifulSoup

browserurl = "http://woeid.rosselliot.co.nz"
userInput = input('Town, state, country, address, zipcode or landmark : ').replace(',','%20').replace(' ','%20')
browser = mechanicalsoup.StatefulBrowser()

browser.open(browserurl)
browser.select_form('form[action="http://woeid.rosselliot.co.nz/lookup"]')
browser["place"] = userInput

browser.close()

baseurl = "http://woeid.rosselliot.co.nz/lookup"
url = '%(U)s/%(I)s' % {'U': baseurl, "I": userInput}