从PyQt4到PyQt5,Beautiful Soup,内核死亡,重新启动

时间:2019-03-18 00:19:54

标签: python-3.x beautifulsoup pyqt4 pyqt5

我正在重新安装Anaconda“ Python 3.7.1 64位| Qt 5.9.6 | PyQt5 5.9.2 | Windows 10 | Spyder 3.3.2”,并且没有运气将Youtube Sentdex“动态Javascript抓取-美丽的汤4 p.4的Web抓取”移植到PyQt5。你能帮忙吗?非常感谢!

import sys
from PyQt4.QtGui import QApplication 
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage  
import bs4 as bs

class Client(QWebPage):

    def _init_ (self, url):
        self.app = QApplication(sys.argv)
        #QWebPage.__init__(self)
        QWebEnginePage.__init__(self)
        self.loadFinished.connect(self.on_page_load)
        self.mainFrame().load(QUrl(url))
        self.app.exec()

    def on_page_load(self):
        self.app.quit()

url = 'https://pythonprogramming.net/parsememcparseface/'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
js_test = soup.find('p', class_ = 'jstest')
print(js_test.text)
# output should be "Look at you shinin!"

1 个答案:

答案 0 :(得分:0)

PyQt4 to PyQt5 -> mainFrame() deprecated, need fix to load web pages表示解决方案,但我无法上班。有人可以确认该代码在我的编码环境下是否可以工作?谢谢!

import sys
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
import bs4 as bs

class Page(QWebEnginePage):
    def __init__(self, url):
        self.app = QApplication(sys.argv)
        QWebEnginePage.__init__(self)
        self.html = ''
        self.loadFinished.connect(self._on_load_finished)
        self.load(QUrl(url))
        self.app.exec_()

    def _on_load_finished(self):
        self.html = self.toHtml(self.Callable)
        print('Load finished')

    def Callable(self, html_str):
        self.html = html_str
        self.app.quit()

def main():
    page = Page('https://pythonprogramming.net/parsememcparseface/')
    soup = bs.BeautifulSoup(page.html, 'html.parser')
    js_test = soup.find('p', class_='jstest')
    print(js_test.text)    #fixed
    # output should be "Look at you shinin!"

if __name__ == '__main__': main()