我正在重新安装Anaconda“ Python 3.7.1 64位| Qt 5.9.6 | PyQt5 5.9.2 | Windows 10 | Spyder 3.3.2”,并且没有运气将Youtube Sentdex“动态Javascript抓取-美丽的汤4 p.4的Web抓取”移植到PyQt5。你能帮忙吗?非常感谢!
import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
import bs4 as bs
class Client(QWebPage):
def _init_ (self, url):
self.app = QApplication(sys.argv)
#QWebPage.__init__(self)
QWebEnginePage.__init__(self)
self.loadFinished.connect(self.on_page_load)
self.mainFrame().load(QUrl(url))
self.app.exec()
def on_page_load(self):
self.app.quit()
url = 'https://pythonprogramming.net/parsememcparseface/'
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
js_test = soup.find('p', class_ = 'jstest')
print(js_test.text)
# output should be "Look at you shinin!"
答案 0 :(得分:0)
PyQt4 to PyQt5 -> mainFrame() deprecated, need fix to load web pages表示解决方案,但我无法上班。有人可以确认该代码在我的编码环境下是否可以工作?谢谢!
import sys
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
import bs4 as bs
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('https://pythonprogramming.net/parsememcparseface/')
soup = bs.BeautifulSoup(page.html, 'html.parser')
js_test = soup.find('p', class_='jstest')
print(js_test.text) #fixed
# output should be "Look at you shinin!"
if __name__ == '__main__': main()