我尝试使用beautifulsoup解析web page并使用以下代码:
def parse():
gHeader = {'User-Agent': 'Mozilla/5.0'}
gNewsLinkUrl = "http://www.google.com/finance?q=NYSE%3AF&ei=LvflU_itN8zbkgW0i4GABQ"
lPrevLinkReq = urllib2.Request(gNewsLinkUrl,headers=gHeader)
lPrevPage = urllib2.urlopen(lPrevLinkReq)
lPrevPageSoup = BeautifulSoup(lPrevPage )
当我执行上述函数parse()
时,出现以下错误:
Traceback (most recent call last):
File "google_finance_news.py", line 42, in <module>
FetchNewsDataFromWeb()
File "google_finance_news.py", line 33, in FetchNewsDataFromWeb
lPrevPage = BeautifulSoup(lPrevPage)
File "C:\C42\Finance\bs4\__init__.py", line 172, in __init__
self._feed()
File "C:\C42\Finance\bs4\__init__.py", line 185, in _feed
self.builder.feed(self.markup)
File "C:\C42\Finance\bs4\builder\_lxml.py", line 195, in feed
self.parser.close()
File "parser.pxi", line 1283, in lxml.etree._FeedParser.close (src\lxml\lxml.etree.c:98846)
File "parser.pxi", line 1313, in lxml.etree._FeedParser.close (src\lxml\lxml.etree.c:98695)
File "parsertarget.pxi", line 142, in lxml.etree._TargetParserContext._handleParseResult (src\lxml\lxml.etree.c:112853)
File "parsertarget.pxi", line 130, in lxml.etree._TargetParserContext._handleParseResult (src\lxml\lxml.etree.c:112677)
File "lxml.etree.pyx", line 327, in lxml.etree._ExceptionContext._raise_if_stored (src\lxml\lxml.etree.c:10196)
File "saxparser.pxi", line 499, in lxml.etree._handleSaxData (src\lxml\lxml.etree.c:107747)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xc2 in position 719: invalid continuation byte
请帮我解决问题。