PyQt5用于网页抓取,无法正常工作

时间:2016-12-13 08:53:45

标签: python html pyqt5

This is the screenshot of the error I am getting.以下是我的代码。我正在使用PyQt5和Python 3.5来抓取一个网站。请告诉我我的代码有什么问题:

import sys
from PyQt5.QtCore import QEventLoop
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWidgets import QApplication
import urllib.request
import bs4 as bs

class Render(QWebEngineView):

    def __init__(self, html):
        self.html = None
        self.app = QApplication(sys.argv)
        QWebEngineView.__init__(self)
        self.loadFinished.connect(self._loadFinished)
        self.setHtml(html)
        while self.html is None:
            self.app.processEvents(
                QEventLoop.ExcludeUserInputEvents |
                QEventLoop.ExcludeSocketNotifiers |
                QEventLoop.WaitForMoreEvents)
        self.app.quit()

    def _callable(self, data):
        self.html = data

    def _loadFinished(self, result):
        self.page().toHtml(self._callable)


html = 'http://www.couponraja.com/stores'

render = Render(html)
source = render.html()


soup = bs.BeautifulSoup(source, 'html.parser')

links = soup.select('.list-of-store li a')

for link in links:
    print (link.get('href'))

我正在尝试访问动态script生成的html组件。

0 个答案:

没有答案