使用python3进行动态内容网络抓取

时间:2017-04-03 05:12:21

标签: python python-3.x web-scraping pyqt4

如果我运行下面的代码低于错误。你可以帮我解决这些问题。

Traceback (most recent call last):
  File "dynamic.py", line 20, in <module>
    client_response = Client(url)
TypeError: QWebPage(QObject parent=None): argument 1 has unexpected type 'str'
  

代码:

import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
import bs4 as bs


class Client(QWebPage):
    def _int_(self, url):
        self.app = QApplication(sys.argv)
        QWebPage._init_(self)
        self.loadFinished.connect(self.on_page_load)
        self.mainFrame().load(QUrl(url))
        self.app.exec_()
    def on_page_load(self):
        self.app.quit()

url = "https://pythonprogramming.net/parsememcparseface/"
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
js_test = soup.find('p', class_='jstest')
print(js_test.text)
  

改性:

import sys
from PyQt4.QtGui import QApplication
from PyQt4.QtCore import QUrl
from PyQt4.QtWebKit import QWebPage
import bs4 as bs


class Client(QWebPage):
    def __init__(self, url):
        self.app = QApplication(sys.argv)
        QWebPage.__init__(self)
        self.loadFinished.connect(self.on_page_load)
        self.mainFrame().load(QUrl(url))
        self.app.exec_()
    def on_page_load(self):
        self.app.quit()

url = "https://pythonprogramming.net/parsememcparseface/"
client_response = Client(url)
source = client_response.mainFrame().toHtml()
soup = bs.BeautifulSoup(source, 'lxml')
js_test = soup.find('p', class_='jstest')
print(js_test.text)

0 个答案:

没有答案