我一直试图将这个页面刮掉一段时间,无论我尝试什么,我似乎都无法得到表格数据。我得到的最接近的是表格标题和标题。
我尝试过使用PhantomJS,Selenium和其他方法,但我被卡住了。
网站:http://marketwatch.dfm.ae/?isRedirected=true
import sys
from bs4 import BeautifulSoup
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
url = 'http://marketwatch.dfm.ae/?isRedirected=true'
r = Render(url)
soup = BeautifulSoup(unicode(r.frame.toHtml()))
print soup
我试过这个
import platform
from bs4 import BeautifulSoup
from selenium import webdriver
PHANTOMJS_PATH='./phantomjs'
browser = webdriver.PhantomJS(PHANTOMJS_PATH)
browser.get('http://marketwatch.dfm.ae/')
# let's parse our html
soup = BeautifulSoup(browser.page_source, "html.parser")
# get all the table data
tabdata = soup.find_all('tr', {'id': 'mw'})
print tabdata
感谢任何帮助。 谢谢