我一直在尝试从页面检索href链接并将其用作下一个href链接的变量。但我坚持一点,我有多个href链接与不同的文件扩展名(如zip,md5等),只需要一个zip扩展文件。这是我试图实现的代码。
import httplib2
from BeautifulSoup import BeautifulSoup, SoupStrainer
http = httplib2.Http()
status, response = http.request('http://example.com')
for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
if link.has_key('href'):
if '/abc' in link['href']:
basename = link['href'].split("/")[11]
print basename
status, response = http.request('http://example.com/%basename',basename)
for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
if link.has_key('href'):
if '/abc' in link['href']:
basename = link['href'].split("/")[11]
print basename
答案 0 :(得分:0)
试一试:
import os
# YOY CODE here
for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
if link.has_key('href'):
if '/abc' in link['href']:
basename = link['href'].split("/")[11]
# check file extension
filename, file_extension = os.path.splitext(basename)
print basename, file_extension
if file_extension.lower() == 'zip':
continue
# YOUR LAST CODE