我使用scrapy + selenium + phantomjs从web获取数据,其中javascript加载数据。当我使用铬。它运行良好,但当我将浏览器更改为phantomjs时,它无法正常工作(无法通过javascript加载数据)。这是代码(Windows 7平台):
class MyCustomDownloaderMiddleware(object):
def __init__(self):
#self.driver = webdriver.Chrome()
self.driver = webdriver.PhantomJS()
def process_request(self, request, spider):
if r"http://wenshu.court.gov.cn/list/list/?sorttype=1&conditions=searchWord+QWJS+++%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2:%E6%88%90%E9%83%BD%E9%93%B6%E8%A1%8C%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8" == request.url:
self.driver.get(request.url)
try:
element = WebDriverWait(self.driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "dataItem")))
body = self.driver.page_source
return HtmlResponse(self.driver.current_url, body=body, encoding='utf-8', request=request)
except:
body = self.driver.page_source
return HtmlResponse(self.driver.current_url, body=body, encoding='utf-8', request=request)
else:
pass
def __del__(self):
self.driver.quit()