我在Ubuntu 12.04(amd64)上使用PyQt 4.9.1(尝试使用python 2.6和2.7)来制作无头浏览器,但我得到:程序收到信号SIGSEGV,Segmentation fault。 这是该程序的简化版本(仍然很长一点):
# -*- coding: utf-8 -*-
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1024, 768), color_depth=24)
display.start()
from PyQt4.QtGui import QApplication
#from PySide.QtGui import QApplication
app = QApplication([])
import qt4reactor
qt4reactor.install()
from twisted.web import server
from twisted.web.xmlrpc import XMLRPC
from twisted.internet import defer
from PyQt4.QtWebKit import QWebSettings, QWebView, QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, QByteArray, QTimer
class CustomQNetworkAccessManager(QNetworkAccessManager):
def __init__(self, *args, **kwargs):
super(CustomQNetworkAccessManager, self).__init__(*args, **kwargs)
self.sslErrors.connect(self._ssl_errors)
def _ssl_errors(self, reply, errors):
reply.ignoreSslErrors()
for error in errors:
print 'Ignored SSL Error: {0} - {1}'.format(error.error(), error.errorString())
class CustomQWebPage(QWebPage):
def __init__(self, *args, **kwargs):
super(CustomQWebPage, self).__init__(*args, **kwargs)
def userAgentForUrl(self, url):
return 'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0'
class WebkitWrapper(QWebView):
def __init__(self, *args, **kwargs):
super(WebkitWrapper, self).__init__(*args, **kwargs)
self.network_manager = CustomQNetworkAccessManager()
web_page = CustomQWebPage()
web_page.setNetworkAccessManager(self.network_manager)
self.setPage(web_page)
settings = self.settings()
settings.setAttribute(QWebSettings.AutoLoadImages, True)
settings.setAttribute(QWebSettings.JavaEnabled, False)
settings.setAttribute(QWebSettings.JavascriptEnabled, False)
settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
settings.setAttribute(QWebSettings.PluginsEnabled, False)
self.loadFinished.connect(self._load_finished)
def perform(self, request_data, timeout=15):
self._deferred_request = defer.Deferred()
url = request_data.get('url', '')
request = QNetworkRequest()
request.setUrl(QUrl(url))
self.load(request)
print 'getting: {0}'.format(url)
return self._deferred_request
def _load_finished(self, ok):
print 'load finished: {0}'.format(ok)
frame = self.page().mainFrame()
result = frame.toHtml()
self._deferred_request.callback(result)
class HeadlessBrowser(object):
def __init__(self, instance_id):
self.webkit_wrapper = WebkitWrapper()
self.instance_id = instance_id
def _return_intance_id(self, result):
return self.instance_id, result
def _request_failed(self, failure):
failure.trap(Exception)
err_msg = failure.getErrorMessage()
print err_msg
print failure.getTraceback()
return err_msg
def shutdown(self):
self.webkit_wrapper.close()
def get_request(self, request_data):
d = self.webkit_wrapper.perform(request_data)
d.addErrback(self._request_failed)
d.addBoth(self._return_intance_id)
return d
class TestXMLRPCServer(XMLRPC):
def __init__(self):
XMLRPC.__init__(self, allowNone=True)
self.browser_instances = dict()
self.instance_counter = 0
def _result_returned(self, result):
instance_id, browser_result = result
print 'killing instance: {0}'.format(instance_id)
browser_instance = self.browser_instances.pop(instance_id)
browser_instance.shutdown()
return browser_result
def xmlrpc_open(self, request_data):
print 'requested: {0}'.format(request_data)
self.instance_counter += 1
browser = HeadlessBrowser(self.instance_counter)
self.browser_instances[self.instance_counter] = browser
deferred_result = browser.get_request(request_data)
deferred_result.addCallback(self._result_returned)
return deferred_result
def start_server(port=8297):
from twisted.internet import reactor
r = TestXMLRPCServer()
reactor.listenTCP(port, server.Site(r))
reactor.run()
if __name__ == '__main__':
start_server()
现在我可以告诉问题是存储这些HeadlessBrowser类的实例,我使用dict来存储它们,而在真正的实现中我通过自定义session_id重用它们,但我在这里伪造了一个instance_counter只是为了展示它是如何工作的。我认为无论我将它们存储在dict中,看起来实例都是垃圾收集,或者我不确定问题发生的原因。 无论如何,如果我删除dict的用法,并使它只在xmlrpc类中存储一个实例作为类属性,则问题不会发生。
这是它的示例客户端:
# -*- coding: utf-8 -*-
import xmlrpclib
def test_server(port=8297):
s = xmlrpclib.Server('http://localhost:{0}/'.format(port))
html = s.open({'url': 'http://www.microsoft.com'})
print html
if __name__ == '__main__':
test_server()
有什么建议吗?
更新:添加了回溯:
(gdb) run
Starting program ...
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[New Thread 0x7fffe8f60700 (LWP 13393)]
Xlib: extension "RANDR" missing on display ":1851".
[New Thread 0x7fffe3fff700 (LWP 13394)]
[New Thread 0x7fffe37fe700 (LWP 13395)]
requested: {'url': 'http://www.microsoft.com'}
[New Thread 0x7fffd9a2f700 (LWP 13422)]
[New Thread 0x7fffd9116700 (LWP 13423)]
[New Thread 0x7fffcfdd2700 (LWP 13425)]
getting: http://www.microsoft.com
[New Thread 0x7fffcf5d1700 (LWP 13426)]
[New Thread 0x7fffc5f28700 (LWP 13427)]
[Thread 0x7fffe37fe700 (LWP 13395) exited]
load finished: True
killing instance: 1
Program received signal SIGSEGV, Segmentation fault.
QMetaObject::activate (sender=0x0, m=<optimized out>, local_signal_index=8, argv=0x7fffffffc960) at kernel/qobject.cpp:3456
3456 kernel/qobject.cpp: No such file or directory.
(gdb) bt
#0 QMetaObject::activate (sender=0x0, m=<optimized out>, local_signal_index=8, argv=0x7fffffffc960) at kernel/qobject.cpp:3456
#1 0x00007fffdad358b2 in QWebFrame::loadFinished (this=<optimized out>, _t1=true) at ./moc_qwebframe.cpp:239
#2 0x00007fffdad74e08 in WebCore::FrameLoaderClientQt::dispatchDidFinishLoad (this=0x1428290) at WebCoreSupport/FrameLoaderClientQt.cpp:527
#3 0x00007fffdb0cfcbb in WebCore::FrameLoader::recursiveCheckLoadComplete (this=0x7fffd9141478) at loader/FrameLoader.cpp:2641
#4 0x00007fffdb100754 in WebCore::SubresourceLoader::didFinishLoading (this=0x7fffc5f3d300, finishTime=0) at loader/SubresourceLoader.cpp:202
#5 0x00007fffdb2f033b in WebCore::QNetworkReplyHandler::finish (this=0x14adcb0) at platform/network/qt/QNetworkReplyHandler.cpp:454
#6 0x00007fffdb2f01ea in flush (this=0x14adce8) at platform/network/qt/QNetworkReplyHandler.cpp:195
#7 WebCore::QNetworkReplyHandlerCallQueue::flush (this=0x14adce8) at platform/network/qt/QNetworkReplyHandler.cpp:187
#8 0x00007fffdb2f0255 in WebCore::QNetworkReplyHandlerCallQueue::push (this=0x14adce8, method=
(void (WebCore::QNetworkReplyHandler::*)(WebCore::QNetworkReplyHandler * const)) 0x7fffdb2f0260 <WebCore::QNetworkReplyHandler::finish()>)
at platform/network/qt/QNetworkReplyHandler.cpp:164
#9 0x00007fffdb2f0c8c in WebCore::QNetworkReplyWrapper::didReceiveFinished (this=0x14af650) at platform/network/qt/QNetworkReplyHandler.cpp:349
#10 0x00007ffff482f281 in QMetaObject::activate (sender=0x14ae120, m=<optimized out>, local_signal_index=<optimized out>, argv=0x0) at kernel/qobject.cpp:3547
#11 0x00007fffe0ea5fe6 in QNetworkReplyImplPrivate::finished (this=0x14ae210) at access/qnetworkreplyimpl.cpp:795
#12 0x00007fffe0f1c655 in QNetworkAccessHttpBackend::qt_static_metacall (_o=0x14ae5c0, _c=<optimized out>, _id=<optimized out>, _a=<optimized out>)
at .moc/release-shared/moc_qnetworkaccesshttpbackend_p.cpp:90
#13 0x00007ffff4834446 in QObject::event (this=0x14ae5c0, e=<optimized out>) at kernel/qobject.cpp:1195
#14 0x00007ffff4d3d894 in notify_helper (e=0x7fffc8019be0, receiver=0x14ae5c0, this=0x9d0e30) at kernel/qapplication.cpp:4559
#15 QApplicationPrivate::notify_helper (this=0x9d0e30, receiver=0x14ae5c0, e=0x7fffc8019be0) at kernel/qapplication.cpp:4531
#16 0x00007ffff4d42713 in QApplication::notify (this=0x966ab0, receiver=0x14ae5c0, e=0x7fffc8019be0) at kernel/qapplication.cpp:4420
#17 0x00007ffff5d4c016 in ?? () from .../lib/python2.6/site-packages/PyQt4/QtGui.so
#18 0x00007ffff481ae9c in QCoreApplication::notifyInternal (this=0x966ab0, receiver=0x14ae5c0, event=0x7fffc8019be0) at kernel/qcoreapplication.cpp:876
#19 0x00007ffff481ec6a in sendEvent (event=0x7fffc8019be0, receiver=0x14ae5c0) at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:231
#20 QCoreApplicationPrivate::sendPostedEvents (receiver=0x0, event_type=0, data=0x9d1000) at kernel/qcoreapplication.cpp:1500
#21 0x00007ffff4849f93 in sendPostedEvents () at ../../include/QtCore/../../src/corelib/kernel/qcoreapplication.h:236
#22 postEventSourceDispatch (s=<optimized out>) at kernel/qeventdispatcher_glib.cpp:279
#23 0x00007ffff3a8ec9a in g_main_context_dispatch () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
---Type <return> to continue, or q <return> to quit---
#24 0x00007ffff3a8f060 in ?? () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#25 0x00007ffff3a8f124 in g_main_context_iteration () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
#26 0x00007ffff484a3bf in QEventDispatcherGlib::processEvents (this=0xb7dfb0, flags=...) at kernel/qeventdispatcher_glib.cpp:424
#27 0x00007ffff4de5d5e in QGuiEventDispatcherGlib::processEvents (this=<optimized out>, flags=...) at kernel/qguieventdispatcher_glib.cpp:204
#28 0x00007ffff4819c82 in QEventLoop::processEvents (this=<optimized out>, flags=...) at kernel/qeventloop.cpp:149
#29 0x00007ffff4819ed7 in QEventLoop::exec (this=0xea6cd0, flags=...) at kernel/qeventloop.cpp:204
#30 0x00007ffff0d627e2 in ?? () from .../lib/python2.6/site-packages/PyQt4/QtCore.so
#31 0x000000000049a15d in PyEval_EvalFrameEx ()
#32 0x000000000049be0f in PyEval_EvalCodeEx ()
#33 0x000000000049a57a in PyEval_EvalFrameEx ()
#34 0x000000000049be0f in PyEval_EvalCodeEx ()
#35 0x000000000049a57a in PyEval_EvalFrameEx ()
#36 0x000000000049be0f in PyEval_EvalCodeEx ()
#37 0x000000000049bef2 in PyEval_EvalCode ()
#38 0x00000000004be6e0 in PyRun_FileExFlags ()
#39 0x00000000004bf3d7 in PyRun_SimpleFileExFlags ()
#40 0x0000000000418850 in Py_Main ()
#41 0x00007ffff68e576d in __libc_start_main () from /lib/x86_64-linux-gnu/libc.so.6
#42 0x0000000000417ab1 in _start ()
(gdb)
答案 0 :(得分:9)
我为迟到的回复道歉,最后我有时间为我的问题发布解决方案。基本上发生了段错误是因为在删除最后一次对浏览器实例的引用之前没有删除qt对象。这是固定代码:
# -*- coding: utf-8 -*-
from pyvirtualdisplay import Display
display = Display(visible=False, size=(1024, 768), color_depth=24)
display.start()
from PyQt4.QtGui import QApplication
app = QApplication([])
import qt4reactor
qt4reactor.install()
from twisted.web import server
from twisted.web.xmlrpc import XMLRPC
from twisted.internet import defer
from PyQt4.QtWebKit import QWebSettings, QWebView, QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
from PyQt4.QtCore import QUrl, Qt
class CustomQNetworkAccessManager(QNetworkAccessManager):
def __init__(self, *args, **kwargs):
super(CustomQNetworkAccessManager, self).__init__(*args, **kwargs)
self.sslErrors.connect(self._ssl_errors)
self.finished.connect(self._finished)
def _ssl_errors(self, reply, errors):
reply.ignoreSslErrors()
for error in errors:
print 'Ignored SSL Error: {0} - {1}'.format(error.error(), error.errorString())
def _finished(self, reply):
reply.deleteLater()
class CustomQWebPage(QWebPage):
def __init__(self, *args, **kwargs):
super(CustomQWebPage, self).__init__(*args, **kwargs)
def userAgentForUrl(self, url):
return 'Mozilla/5.0 (Windows NT 6.1; rv:5.0) Gecko/20100101 Firefox/5.0'
class WebkitWrapper(object):
def __init__(self, *args, **kwargs):
super(WebkitWrapper, self).__init__(*args, **kwargs)
self.web_view = QWebView()
self.network_manager = CustomQNetworkAccessManager()
self.web_page = CustomQWebPage()
self.web_page.setNetworkAccessManager(self.network_manager)
self.web_view.setPage(self.web_page)
self.web_view.setAttribute(Qt.WA_DeleteOnClose, True)
settings = self.web_view.settings()
settings.setAttribute(QWebSettings.AutoLoadImages, False)
settings.setAttribute(QWebSettings.JavaEnabled, False)
settings.setAttribute(QWebSettings.JavascriptEnabled, False)
settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
settings.setAttribute(QWebSettings.PluginsEnabled, False)
self.web_view.loadFinished.connect(self._load_finished)
def perform(self, request_data, timeout=15):
self._deferred_request = defer.Deferred()
url = request_data.get('url', '')
request = QNetworkRequest()
request.setUrl(QUrl(url))
self.web_view.load(request)
print 'getting: {0}'.format(url)
return self._deferred_request
def shutdown(self):
print 'webview shutdown'
self.web_view.close()
self.network_manager.deleteLater()
self.web_page.deleteLater()
self.web_view.deleteLater()
print 'deletelater scheduled'
def _load_finished(self, ok):
print 'load finished: {0}'.format(ok)
frame = self.web_view.page().mainFrame()
result = unicode(frame.toHtml()).encode('utf-8')
self._deferred_request.callback(result)
class HeadlessBrowser(object):
def __init__(self, instance_id):
self.webkit_wrapper = WebkitWrapper()
self.instance_id = instance_id
def _return_intance_id(self, result):
return self.instance_id, result
def _request_failed(self, failure):
failure.trap(Exception)
err_msg = failure.getErrorMessage()
print err_msg
failure.printDetailedTraceback()
return err_msg
def shutdown(self):
self.webkit_wrapper.shutdown()
def _run_perform(self, _r, request_data):
return self.webkit_wrapper.perform(request_data)
def get_request(self, request_data):
deferred_request = self.webkit_wrapper.perform(request_data)
deferred_request.addErrback(self._request_failed)
deferred_request.addBoth(self._return_intance_id)
return deferred_request
class TestXMLRPCServer(XMLRPC):
def __init__(self):
XMLRPC.__init__(self, allowNone=True)
self.browser_instances = dict()
self.instance_counter = 0
def _request_completed(self, result):
instance_id, dest_result = result
print 'instances: ', self.browser_instances
print 'killing instance: {0}'.format(instance_id)
browser_instance = self.browser_instances.pop(instance_id, None)
browser_instance.shutdown()
print 'instances: ', self.browser_instances
return dest_result
def xmlrpc_open(self, request_data):
print 'requested: {0}'.format(request_data)
self.instance_counter += 1
instance_id = str(self.instance_counter)
browser = HeadlessBrowser(instance_id)
def_request = browser.get_request(request_data)
def_request.addCallback(self._request_completed)
self.browser_instances[instance_id] = browser
return def_request
def start_server(port=8297):
from twisted.internet import reactor
r = TestXMLRPCServer()
reactor.listenTCP(port, server.Site(r))
reactor.run()
if __name__ == '__main__':
start_server()
一切都适用于这一行:
self.web_page.deleteLater()
但最好确定.. 希望能为某人节省一些调试时间..