以下是我的代码:
transition.startAnim
我要定位以下href:
0
我想在上面提取href,但是我得到# -*- coding: ascii -*-
# import libraries
from bs4 import BeautifulSoup
import urllib2
import re
def gethyperLinks(url):
html_page = urllib2.urlopen(url)
soup = BeautifulSoup(html_page, "html.parser")
hyperlinks = []
for link in soup.findAll('div', attrs={'class': 'ess-product-desc'}):
hyperlinks.append(link.get('href'))
return hyperlinks
print( gethyperLinks("http://biggestbook.com/ui/catalog.html#/search?cr=1&rs=12&st=BM&category=1") )
作为最终答案。我在做什么错了?
答案 0 :(得分:1)
答案 1 :(得分:0)
也许,您应该使用“ html5lib”而不是“ html.parser”,例如:
from bs4 import BeautifulSoup
html="""
<div
class="ess-product-desc" ng-hide="currentView == 'detail' `&& deviceType=='mobile'"
ui-sref="detail({itemId: 'BWK6400', uom: 'CT', cm_sp:'', merchPreference:''})"
href="#/itemDetail?`itemId=BWK6400&uom=CT" aria-hidden="false">
<span>Center-Pull Hand Towels, 2-Ply, Perforated, 7 7/8 x 10, White, 600/RL, 6 RL/CT</span>
</div>
"""
soup = BeautifulSoup(html,"html5lib")
links = soup.findAll('div', attrs={'class': 'ess-product-desc'})
links[0].get("href")
您将得到:
'#/itemDetail?`itemId=BWK6400&uom=CT'