我想从website获取弹出数据。
如第一幅图所示,我需要单击一个链接。
此后,将出现一个弹出窗口,如第二个图所示。
此弹出窗口的内容正是我想要的。
我尝试使用pyqyt5
来跟踪示例,以获取数据。
但是,该程序将继续永久运行。
如何解决这个问题?
非常感谢您。
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.first_pass = True
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.load(QUrl(url))
self.app.exec_()
def _load_finished(self, result):
if self.first_pass:
self._first_finished()
self.first_pass = False
else:
self._second_finished()
def _first_finished(self):
self.page().runJavaScript("document.getElementById('auto-header-citypop-citylist');")
def _second_finished(self):
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = r'https://www.autohome.com.cn'
web = Render(url)
with open('data2.html', 'w', encoding='utf-8-sig') as f:
f.write(web.html)
答案 0 :(得分:1)
页面上有这个JavaScript:
if (rf === "" || rf.toLocaleLowerCase().indexOf(".autohome.com.cn") === -1) {
if (screen == undefined || screen.width < 810) {
if (browser.versions.mobile == true || browser.versions.iPhone == true || browser.versions.ucweb == true || browser.versions.android == true || browser.versions.Symbian == true) {
window.location.href = "//m.autohome.com.cn/?from=pc";
return
}
}
}
通过打印self.url()可以看到,这将您重定向到https://m.autohome.com.cn/?from=pc。为了解决这个问题,我将Referer标头设置如下:
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, QByteArray
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWebEngineCore import QWebEngineHttpRequest
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.request = QWebEngineHttpRequest(QUrl(url))
self.request.setHeader(QByteArray().append('Referer'), QByteArray().append('https://www.autohome.com.cn/beijing/'))
self.load(self.request)
self.app.exec_()
def _load_finished(self, result):
self.page().runJavaScript("document.getElementById('auto-header-switcharea').click();")
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = 'https://www.autohome.com.cn/beijing/'
web = Render(url)
soup = BeautifulSoup(web.html, 'html.parser')
for city in soup.find_all('a', {'name':'auto-header-citypop-city'}):
print(city)
输出:
<a data-info="[110100, 646, '北京', 'beijing']" data-key="110100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">北京</a>
<a data-info="[440100, 62, '广州', 'guangzhou']" data-key="440100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">广州</a>
<a data-info="[440300, 670, '深圳', 'shenzhen']" data-key="440300" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">深圳</a>
<a data-info="[320100, 335, '南京', 'nanjing']" data-key="320100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">南京</a>
<a data-info="[310100, 649, '上海', 'shanghai']" data-key="310100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">上海</a>
....
click事件后没有页面加载,因此不需要两个_load_finished方法。