大家好我使用此代码获取网页的html代码。 我需要从中提取所有无序列表标记...什么是更快更简单的方法?
#test.py
import requests
req = requests.get('http://www.example.com')
print 'Response Code: ' + str(req.status_code)
print '\nResponse:\n' + req.text
答案 0 :(得分:3)
import urllib, bs4
pages = bs4.BeautifulSoup(urllib.urlopen(your_url).read())
lists = pages('ul') # your list of unordered list elements
答案 1 :(得分:0)
import urllib2
from bs4 import BeautifulSoup
html_page = urllib2.urlopen("http://example.com")
soup = BeautifulSoup(html_page)
li = soup.select("ul")
print li