我收到此错误
Traceback(最近一次调用最后一次):文件 “C:\ Users \ Dad \ Desktop \ wunderground.py”,第63行,中 soup = BeautifulSoup(page.read()。decode('utf-8','ignore'))文件“C:\ Python27 \ lib \ site-packages \ bs4__init __。py”,第172行,在 init < /强> self._feed()文件“C:\ Python27 \ lib \ site-packages \ bs4__init __。py”,第185行,在_feed中 self.builder.feed(self.markup)文件“C:\ Python27 \ lib \ site-packages \ bs4 \ builder_htmlparser.py”,第146行, 在饲料中 parser.feed(标记)文件“C:\ Python27 \ Lib \ HTMLParser.py”,第117行,在Feed中 self.goahead(0)文件“C:\ Python27 \ Lib \ HTMLParser.py”,第161行,在goahead k = self.parse_starttag(i)文件“C:\ Python27 \ Lib \ HTMLParser.py”,第327行,在parse_starttag中 self.handle_starttag(tag,attrs)文件“C:\ Python27 \ lib \ site-packages \ bs4 \ builder_htmlparser.py”,第48行, 在handle_starttag中 self.soup.handle_starttag(name,None,None,dict(attrs))文件“C:\ Python27 \ lib \ site-packages \ bs4__init __。py”,第298行,in handle_starttag self.currentTag,self.previous_element)文件“C:\ Python27 \ lib \ site-packages \ bs4 \ element.py”,第749行, init self.name,attrs)文件“C:\ Python27 \ lib \ site-packages \ bs4 \ builder__init __。py”,第160行,in _replace_cdata_list_attribute_values values = whitespace_re.split(value)TypeError:期望的字符串或缓冲区
当我尝试运行时
import urllib2
from bs4 import BeautifulSoup
f = open('wunder-data.txt', 'w')
dayTemp1 = 32
dayTemp2 = 32
dayTemp3 = 32
dayTemp4 = 38
dayTemp5 = 42
dayTemp6 = 48
dayTemp7 = 45
dewPoint1 = 32
dewPoint2 = 32
dewPoint3 = 28
dewPoint4 = 28
dewPoint5 = 31
dewPoint6 = 35
dewPoint7 = 31
#Clv1Pressure = 30.30
#Clv2Pressure = 30.47
#Clv3Pressure = 30.35
#Clv4Pressure = 30.26
#Clv5Pressure = 30.31
#Clv6Pressure = 30.09
#Clv7Pressure = 30.25
f.write(str(dayTemp1)+','+str(dewPoint1)+','+
str(dayTemp2)+','+str(dewPoint2)+','+
str(dayTemp3)+','+str(dewPoint3)+','+
str(dayTemp4)+','+str(dewPoint4)+','+
str(dayTemp5)+','+str(dewPoint5)+','+
str(dayTemp6)+','+str(dewPoint6)+','+
str(dayTemp7)+','+str(dewPoint7)+','+'\n')
for y in range(2009): #later 1985 to 2015
for m in range(1, 13):
for d in range(1, 32):
# Check if leap year
if y%400 == 0:
leap = True
elif y%100 == 0:
leap = False
elif y%4 == 0:
leap = True
else:
leap = False
# Check if already gone through month
if (m == 2 and leap and d > 29):
continue
elif (m == 2 and d > 28):
continue
elif (m in [4, 6, 9, 10] and d > 30):
continue
url = "http://www.wunderground.com/history/airport/KADS/{0}/{1}/{2}/DailyHistory.html".format(y,m,d)
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read().decode('utf-8', 'ignore'))
wx = soup.findAll(attrs={"class":"wx-value"})
try:
test = wx[16].string
break
except:
try:
test = wx[15].string
break
except:
try:
test = wx[14].string
break
except:
try:
test = wx[13].string
dayTemp1 = dayTemp2
dayTemp2 = dayTemp3
dayTemp3 = dayTemp4
dayTemp4 = dayTemp5
dayTemp5 = dayTemp6
dayTemp6 = dayTemp7
dayTemp7 = wx[0].string
dewPoint1 = dewPoint2
dewPoint2 = dewPoint3
dewPoint3 = dewPoint4
dewPoint4 = dewPoint5
dewPoint5 = dewPoint6
dewPoint6 = dewPoint7
dewPoint7 = wx[8].string
#Clv1Pressure = Clv2Pressure
#Clv2Pressure = Clv3Pressure
#Clv3Pressure = Clv4Pressure
#Clv4Pressure = Clv5Pressure
#Clv5Pressure = Clv6Pressure
#Clv6Pressure = Clv7Pressure
#Clv7Pressure = wx[]
f.write(dayTemp1+','+dewPoint1+','+
dayTemp2+','+dewPoint2+','+
dayTemp3+','+dewPoint3+','+
dayTemp4+','+dewPoint4+','+
dayTemp5+','+dewPoint5+','+
dayTemp6+','+dewPoint6+','+
dayTemp7+','+dewPoint7+','+'\n')
except:
break
f.close()