因此,使用https://plnkr.co/edit/7X73A8GidqIzF91fmlkK中显示的代码我尝试从此链接检索欧元值https://impythonist.wordpress.com/2015/01/06/ultimate-guide-for-scraping-javascript-rendered-web-pages/
但这是我得到的结果:[你' 1 BTC \ xa0',你' \ n \ t?\ xa00,00 \ n']
任何人都可以帮助我吗?
import sys
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from lxml import html
#Take this class for granted.Just use result of rendering.
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
url = 'https://btcdirect.eu/nl-nl'
r = Render(url)
result = r.frame.toHtml()
#This step is important.Converting QString to Ascii for lxml to process
archive_links = html.fromstring(str(result.toAscii()))
#QString should be converted to string before processed by lxml
formatted_result = str(result.toAscii())
#Next build lxml tree from formatted_result
tree = html.fromstring(formatted_result)
#Now using correct Xpath we are fetching URL of archives
archive_links = tree.xpath('//*[@id="bitcoinkoers"]/strong[1]/text()')
print archive_links