我正在动态解析一组RSS提要。这是我的代码,适用于大多数网站。
class ParseFeeds:
@staticmethod
def parse(source):
logger = logging.getLogger(__name__)
logger.info("Starting {} at url: {}".format(source.name, source.link))
root = ET.fromstring(requests.get(source.link).text)
items = root.findall(".//item")
logger.info(len(items))
for item in items:
title = ''
if item.find('title') is not None:
title = item.find('title').text
title = ' '.join(title.split())
title = re.sub("'s", "'s", title)
link = ''
if item.find('link') is not None:
link = item.find('link').text
description = ''
if item.find('description') is not None:
description = item.find('description').text
description = ' '.join(description.split())
description = re.sub("'s", "'s", description)
published = timezone.now()
if item.find('pubDate') is not None:
logger.info(item.find('pubDate').text)
published = maya.parse(item.find('pubDate').text).datetime()
url = ''
if item.find('enclosure') is not None:
url = item.find('enclosure').attrib['url']
if item.find('image') is not None:
logger.info(item.find('image').text)
url = item.find('image').text
if not Feed.objects.filter(title=title).exists():
logger.info(
"Adding feed with title:{} link:{} summary:{} published:{} url:{}".format(title, link, description,
published, url))
feed = Feed(title=title, link=link, summary=description, published=published, url=url,
source=source)
feed.save()
logger.info("Adding {} from {}".format(feed.title, feed.source.name))
logger.info("Finished {}".format(source.name))
但是它无法使用此源提取网址。
https://www.football.london/?service=rss
item.find("media:thumbnail")
不起作用。
如何在此源中提取url的值。
任何帮助表示赞赏。