我使用以下程序创建从网站获得的城市列表。现在我想从我创建的列表中找到city(参数)的名称。我该怎么做?
换句话说,如何从列表中找到对象?我试过:listOfCities.find (city)
,因为找不到属性find我收到了错误。
def weatherNow (city):
import urllib
connection = urllib.urlopen("http://weather.canoe.ca/Weather/World.html")
weather = connection.read()
connection.close()
cityLoc = weather.find('class="weatherred"')
cityEnd = weather.find("</a>", cityLoc)
if city != -1:
listOfCities = []
while cityLoc != -1:
cityNames = weather[cityLoc+19:cityEnd-1]
listOfCities.append(cityNames)
cityLoc = weather.find('class="weatherred"', cityLoc+1)
cityEnd = weather.find("</a>", cityLoc)
print listOfCities
答案 0 :(得分:1)
检查city
中是否有listOfCities
:
if city in listOfCities:
# city is in the list
要在列表中查找其索引:
i = listOfCities.index(city)
如果城市不在IndexError
。
listOfCities
您可以使用HTMLParser来解析html而不是正则表达式。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import cgi
try:
from html.parser import HTMLParser
except ImportError: # Python 2
from HTMLParser import HTMLParser
try:
from urllib.request import urlopen
except ImportError: # Python 2
from urllib2 import urlopen
class CitiesParser(HTMLParser):
"""Extract city list from html."""
def __init__(self, html):
HTMLParser.__init__(self)
self.cities = []
self.incity = None
self.feed(html)
def handle_starttag(self, tag, attrs):
self.incity = tag == 'a' and ('class', 'weatherred') in attrs
def handle_endtag(self, tag):
self.incity = False
def handle_data(self, data):
if self.incity:
self.cities.append(data.strip())
# download and parse city list
response = urlopen("http://weather.canoe.ca/Weather/World.html")
_, params = cgi.parse_header(response.headers.get('Content-Type', ''))
html = response.read().decode(params['charset'])
# find city
cities = CitiesParser(html).cities
for city in ['Ar Riyāḍ', 'Riyadh']:
if city in cities:
print("%s is found" % (city,))
print("the index is %d" % (cities.index(city),))
break
else:
print("%r is not found" % (city,))