我知道如何到达要通过硒刮取的结果页面,但是我不知道如何实际刮取结果页面。我也尝试过机械化,但这并没有使我进一步。这是我现在的位置:
import re
import urllib2
import csv
import os
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup, SoupStrainer
import datetime
import time
import smtplib
import atexit
import signal
import json
import os
import gspread
import sys
import gc
script_path = os.path.dirname(os.path.realpath(__file__))
driver = webdriver.PhantomJS(executable_path="/usr/bin/phantomjs", service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
#launches headless browser, completes proper search in Casenet
def main():
driver.get('https://www.courts.mo.gov/casenet/cases/nameSearch.do')
if 'Service Unavailable' in driver.page_source:
log('Casenet website seems to be down. Receiving "service unavailable"')
driver.quit()
gc.collect()
return False
court = Select(driver.find_element_by_id('courtId'))
court.select_by_visible_text('All Participating Courts')
case_enter = driver.find_element_by_id('inputVO.lastName')
case_enter.send_keys('Wakefield & Associates')
driver.find_element_by_id('findButton').click()
time.sleep(1)
number_of_pages = 204
for i in range(number_of_pages):
output_trs = []
party = (driver.find_element_by_class_name('outerTable'))
output_trs.append(party)
print output_trs
main()
最终的想法是将当事人,案件编号和申请日期作为字符串存储在.csv中。当我现在打印output_trs时,我得到:
selenium.webdriver.remote.webelement.WebElement (session="c4e7b9e0-7a3b-11e8-83f2-b9030062270d", element=":wdc:1530125781332")
感谢任何帮助。
答案 0 :(得分:2)
您正在尝试打印网络元素对象rather than its text content。
一种打印文本内容的方式(请注意编码):
for content in output_trs:
print content.text