我正在使用feed解析器和beautifulsoup。 youtube嵌入代码没有明确的密钥。相反,它通过'content'键在html内部,如此
'content': [{'value': '<h4><strong>Video:</strong> cangel Ft De La – ose (Official Video)<span id="more-125869"></span></h4>\n<p> </p>\n<div class="lyte-wrapper"></div>\n<p><span id="more-2331"></span><iframe src="https://www.youtube.com/embed/HFiDh_TcvNE" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"></iframe></p>',}],
所以我创建了一个函数来获取像这样的src代码和图像代码
def pan_task():
url = 'http://example.net/feed/'
name = 'elrealsonidodelakalle'
live_leaks = [i for i in feedparser.parse(url).entries][:3]
the_count = len(live_leaks)
ky = feedparser.parse(url).keys()
oky = [i.keys() for i in feedparser.parse(url).entries][1] # shows what I can pull
def embed_image(html_doc):
soup = BeautifulSoup(html_doc, "html5lib")
embed = soup.iframe.get('src')
remove = 'https://www.youtube.com/embed/'
remaining_pic_code = embed.replace(remove, '')
the_img = 'http://i1.ytimg.com/vi/' + remaining_pic_code + '/hqdefault.jpg'
results = {'src': the_img, 'embed': embed}
return results
results = [{
'name': name,
'text': i.title,
'url': i.id,
'comments': i.title,
'src': embed_image(i.content[0]['value'])['src'],
'embed': embed_image(i.content[0]['value'])['embed'],
'author': None,
'video': True,
'status': 'published'
} for i in live_leaks]
for entry in results:
post = Post() #
post.title = entry['text'] #
title = post.title #
if not Post.objects.filter(title=title):
post.title = entry['text']
post.name = entry['name']
post.url = entry['url']
post.body = entry['comments']
post.image_url = entry['src']
post.video_path = entry['embed']
post.author = entry['author']
post.video = entry['video']
post.status = entry['status']
post.save()
post.tags.add("video")
return print(results)
但它只在我这样做时才有效
live_leaks = [i for i in feedparser.parse(url).entries][:3]
如果我删除三个“[:3]”我收到此错误
Task blog.tasks.pan_task[79707dd3-70ae-40e9-a97a-e5c36dee4004] raised unexpected: AttributeError("'NoneType' object has no attribute 'get'",)
Traceback (most recent call last):
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/celery/app/trace.py", line 438, in __protected_call__
return self.run(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 126, in pan_task
} for i in live_leaks]
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 126, in <listcomp>
} for i in live_leaks]
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 109, in embed_image
embed = soup.iframe.get('src')
AttributeError: 'NoneType' object has no attribute 'get'
[2016-10-15 17:24:43,560: ERROR/MainProcess] Task blog.tasks.pan_task[339ccc72-c87a-4323-948b-4db7afb4f619] raised unexpected: AttributeError("'NoneType' object has no attribute 'get'",)
Traceback (most recent call last):
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/celery/app/trace.py", line 240, in trace_task
R = retval = fun(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/celery/app/trace.py", line 438, in __protected_call__
return self.run(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 126, in pan_task
} for i in live_leaks]
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 126, in <listcomp>
} for i in live_leaks]
File "/Users/ray/Desktop/myheroku/practice/src/blog/tasks.py", line 109, in embed_image
embed = soup.iframe.get('src')
AttributeError: 'NoneType' object has no attribute 'get'. If I go to the feed page I can count eight items. any help with this would be great. I am new to programming all code is my own so if it seems sloppy or unprofessional, that's why