我是Python的新手,试图从this上的第二个网页抓取一个javascript渲染网页中的一些数据。当我尝试将此代码应用于for循环时,它仅返回列表中包含50个项目的2个结果,并给出"Process finished with exit code -1073740940 (0xC0000374)"
错误。有人可以解释原因吗?
我的样本在这里:
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
global linklist
for iurl in linklist:
page = Page(iurl)
soup = bs.BeautifulSoup(page.html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
data = data.text
data = data.splitlines()
print(data)
我也尝试了这个,它仅为第一个列表项提供结果。除了这些之外还有其他方法可以为列表项应用函数吗?
for iurl in linklist:
iurl=main()
我的整个代码在这里:
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import requests
from bs4 import BeautifulSoup
import bs4 as bs
class WebPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext
@property
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
url = self.url().toString()
# do stuff with html...
soup = bs.BeautifulSoup(html, 'html.parser')
veri = soup.find('div', class_='tablo_dual_board')
veri = veri.text
veri = veri.splitlines()
print(veri)
if not self.fetchNext:
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, *args):
# disable javascript error output
pass
if __name__ == '__main__':
# generate some test urls
onexurl = "https://1xbahis1.com/en/live/Football/"
r = requests.get(onexurl)
soup = BeautifulSoup(r.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
# only try 3 urls for testing
urls = urls[:3]
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(urls)
sys.exit(app.exec_())