我试图抓一些网页内容。虽然脚本在spyder python控制台(而不是ipython控制台)中运行正常,但在windows cmd行中运行时会抛出错误。
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from bs4 import BeautifulSoup
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
url = "http://www.shfe.com.cn/en/MarketData/dataview.html?paramid=dailystock"
r = Render(url)
result = r.frame.toHtml()
soup = BeautifulSoup(result, 'lxml')
这是错误消息
soup = BeautifulSoup(result, 'lxml')
File "C:\Anaconda2\lib\site-packages\bs4\__init__.py", line 225, in __init__
markup, from_encoding, exclude_encodings=exclude_encodings)):
File "C:\Anaconda2\lib\site-packages\bs4\builder\_lxml.py", line 118, in prepare_markup
for encoding in detector.encodings:
File "C:\Anaconda2\lib\site-packages\bs4\dammit.py", line 257, in encodings
self.markup, self.is_html)
File "C:\Anaconda2\lib\site-packages\bs4\dammit.py", line 319, in find_declared_encoding
declared_encoding = declared_encoding_match.groups()[0].decode(
AttributeError: 'QString' object has no attribute 'decode'
Python 2.7.13 | Anaconda custom(32位)| (默认情况下,2016年12月19日,13:36:02)[win v300 32位(英特尔)]在win32上