我需要从页面中删除信息(编辑:删除NSFW链接)页面。在进入页面之前,我应该点击一个按钮,以便能够获取页面本身。我正在使用Python 2.7.10
和selenium,PhantomJS 1.9.8
。
继承我的代码:
#!/bin/env/python
# -*- coding: cp1250 -*-
import urllib
import urllib2
import time
from bs4 import BeautifulSoup
import sys, os
from selenium.webdriver.support.wait import WebDriverWait
from selenium import webdriver
reload(sys)
sys.setdefaultencoding("cp1250")
base_url = "https://www.24dolores.pl/"
waiting_time = 20
def get_browser():
return webdriver.PhantomJS("phantomjs.exe")
def download_page_src(url):
try:
browser = get_browser()
wait = WebDriverWait(browser, 30)
browser.get(url)
time.sleep(5)
close = browser.find_element_by_class_name('.enter_pl')
close.click()
html = browser.page_source
browser.close()
return html
except urllib2.HTTPError, error:
return error
except urllib2.URLError, error:
return error
except Exception, error:
return error
page = download_page_src(base_url)
print page
它给出的错误:
C:\Documents and Settings\student>cd C:\Documents and Settings\student\Pulpit
C:\Documents and Settings\student\Pulpit>python test.py
Message: {"errorMessage":"Unable to find element with class name '.enter_pl'","r
equest":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Co
nnection":"close","Content-Length":"98","Content-Type":"application/json;charset
=UTF-8","Host":"127.0.0.1:1708","User-Agent":"Python-urllib/2.7"},"httpVersion":
"1.1","method":"POST","post":"{\"using\": \"class name\", \"sessionId\": \"1d0a4
d60-add2-11e5-84a6-b5f372943a74\", \"value\": \".enter_pl\"}","url":"/element","
urlParsed":{"anchor":"","query":"","file":"element","directory":"/","path":"/ele
ment","relative":"/element","port":"","host":"","password":"","user":"","userInf
o":"","authority":"","protocol":"","source":"/element","queryKey":{},"chunks":["
element"]},"urlOriginal":"/session/1d0a4d60-add2-11e5-84a6-b5f372943a74/element"
}}
Screenshot: available via screen
答案 0 :(得分:1)
只需删除班级名称的点。 如果你想继续使用css选择器,你可以:
driver.find_element_by_css_selector(".enter_pl")