网页搜罗 - 空白回归

时间:2017-08-15 13:52:48

标签: python beautifulsoup blank-line

有人可以向我解释为什么我在运行此代码时会得到空白回报?我只是想用美丽的汤来打印html标签的内容。代码如下。

由于

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 
from bs4 import BeautifulSoup
http = urllib3.PoolManager()

def stats():
 url = 'https://www.flashscore.com.au/football/usa/mls/results/' 
 response = http.request('GET', url)
 soup = BeautifulSoup(response.data,'lxml')
 right_table=soup.find('div',{'class':'fs-table tournament-page'})
 print(right_table.text)

stats()     

1 个答案:

答案 0 :(得分:0)

您可以使用PyQT5获取和处理多个URL,如您在评论中所要求的那样:

from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtWebKit import *
from PyQt5.QtWebKitWidgets import QWebPage
from PyQt5.QtWidgets import QApplication
import bs4 as bs
import sys


class Render(QWebPage):
    def __init__(self):
        super(Render, self).__init__()
        self.mainFrame().loadFinished.connect(self.handleLoadFinished)

    def start(self, urls):
        self._urls = iter(urls)
        self.fetchNext()

    def fetchNext(self):
        try:
            url = next(self._urls)
        except StopIteration:
            return False
        else:
            self.mainFrame().load(QUrl(url))
        return True

    def processCurrentPage(self):
        print (self.mainFrame().url().toString())
        result = self.mainFrame().toHtml()
        soup = bs.BeautifulSoup(result, 'lxml')
        right_table = soup.find('div', {'class': 'fs-table tournament-page'})
        print(right_table.text)

    def handleLoadFinished(self):
        self.processCurrentPage()
        if not self.fetchNext():
            app.quit()


if __name__ == '__main__':
    urls = ["https://www.flashscore.com.au/football/usa/mls/results/", "https://www.flashscore.com.au/football/usa/mls/fixtures/"]
    app = QApplication(sys.argv)
    renderer = Render()
    renderer.start(urls)
    sys.exit(app.exec_())