如何从网站使用PySide / PyQt下载文件

时间:2016-03-05 19:16:33

标签: python qt pyqt pyside

我有这个代码可以接收网页的所有网络资源 我从这个site中获取了这段代码,所以我不知道它是如何工作的,但我知道它接收到网页的所有网络资源,这就是我所需要的。
这是我的代码:

import sys, time
from PySide.QtCore import QUrl, SIGNAL
from PySide.QtGui import QApplication
from PySide.QtWebKit import QWebPage, QWebView, QWebSettings
from PySide.QtNetwork import QNetworkAccessManager, QNetworkRequest

#reload(sys)
#sys.setdefaultencoding('utf-8')
fn_log = 'url_dd.txt'
fp_log = open(fn_log, 'ab+')
class WebPage(QWebPage):
    def __init__(self, logger=None, parent=None):
        super(WebPage, self).__init__(parent)

    def javaScriptConsoleMessage(self, message, lineNumber, sourceID):
        sys.stderr.write('Javascritp error at line number %d\n' % (lineNumber))
        sys.stderr.write('%s\n' % (message, ))
        sys.stderr.write('Source ID: %s\n' % (sourceID, ))

class Crawler(QApplication):
    def __init__(self, url):
        super(Crawler, self).__init__(sys.argv)
        self.url = url
        self.web_view = QWebView()
        self.web_page = WebPage()
        self.web_view.setPage(self.web_page)
        self.web_frame = self.web_page.mainFrame()
        self.network = NetworkAccessManager()
        self.web_page.setNetworkAccessManager(self.network)
        self.settings = self.web_page.settings().globalSettings()
        self.settings.setAttribute(QWebSettings.PluginsEnabled, False)
        QWebSettings.clearMemoryCaches()
        self.web_view.resize(1024, 9000)
        self.connect(self.web_page, SIGNAL('loadFinished(bool)'), self.loadFinished)
        print('Before loading')
        self.web_view.load(QUrl(self.url))
        print('After loading')

    def loadFinished(self, ok):
        print('Start loadFinished()')
        print('Start writing')
        #with open('content_dd.txt', 'ab+') as fp:
            #fp.write(self.web_frame.toHtml().toUtf8())
        print('End writing')
        print('End loadFinished()')
        try:
            self.quit()
        except Exception as e:
            print('FATAL ERROR: %s' % (str(e)))

class NetworkAccessManager(QNetworkAccessManager):
    def __init__(self):
        super(NetworkAccessManager, self).__init__()
        # QNetworkAccessManager.__init__(self)
        self.connect(self, SIGNAL('finished (QNetworkReply *)'), self.finishd)

    def createRequest(self, operation, request, data):
        # url = request.url().toString()
        self.setNetworkAccessible(self.Accessible)
        return QNetworkAccessManager.createRequest(self, operation, request, data)

    def finishd(self, reply):
        print('In NetworkAccessManager finishd')
        url = str(reply.url().toString())
        log = '%s: %s\n' % (time.ctime(), url)
        #fp_log.write(log)
        print(reply)
        print(reply.request())
        print(log)
        print(url)

if __name__ == '__main__':
    url = 'http://need4bit.com'
    crawler = Crawler(url)
    sys.exit(crawler.exec_())

我应该如何修改此代码,以便将所有资源保存到目录中。

0 个答案:

没有答案