我知道当请求dict()对象时(使用格式a = adict [key])并且该键不在字典中时会引发一个键错误。但我的钥匙在那里。它告诉我'嵌入'键是问题
继承我的代码
def scrape_and_store_vlad():
url_two = 'http://www.example.net'
html = requests.get(url_two, headers=headers)
soup = BeautifulSoup(html.text, 'html5lib')
titles = soup.find_all('div', {'class': 'entry-pos-1'})
def make_soup(url):
the_comments_page = requests.get(url, headers=headers)
soupdata = BeautifulSoup(the_comments_page.text, 'html5lib')
comment = soupdata.find('div', {'class': 'article-body'})
para = comment.find_all('p')
kids = [child.text for child in para]
blu = str(kids).strip('[]')
return blu
name = 'vlad'
entries = [{'href': url_two + div.a.get('href'),
'src': url_two + div.a.img.get('data-original'),
'text': div.find('p', 'entry-title').text,
'comments': make_soup(url_two + div.a.get('href')).replace("\\", ""),
'name': name,
'url': url_two + div.a.get('href')
} for div in titles][:6]
# scraping from vlad part two
titles_two = soup.find_all('div', {'class': 'entry-pos-2'})
entries_two = [{'href': url_two + div.a.get('href'),
'src': url_two + div.a.img.get('data-original'),
'text': div.find('p', 'entry-title').text,
'comments': make_soup(url_two + div.a.get('href')).replace("\\", ""),
'name': name,
'url': url_two + div.a.get('href'),
'embed': url_two + div.a.get('href'),
} for div in titles_two][:6]
merged_vlad_entries = entries + entries_two
return merged_vlad_entries
def panties():
from lxml import html
pan_url = 'http://www.example.net'
shtml = requests.get(pan_url, headers=headers)
soup = BeautifulSoup(shtml.text, 'html5lib')
video_row = soup.find_all('div', {'class': 'video'})
name = 'pan videos'
def youtube_link(url):
youtube_page = requests.get(url, headers=headers)
soupdata = BeautifulSoup(youtube_page.text, 'html5lib')
video_row = soupdata.find_all('script', {'type': 'text/javascript'})
entries = [{'text': str(div),
} for div in video_row]
tubby = str(entries[4])
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', tubby)
return urls
def embed(url):
new_embed = url.replace("watch?v=", "embed/")
return new_embed
entries = [{'href': div.a.get('href'),
'src': youtube_link(div.a.get('href'))[1],
'text': div.h4.text,
'comments': div.h4.text,
'name': name,
'url': div.a.get('href'),
'embed': embed(youtube_link(div.a.get('href'))[0]),
} for div in video_row][:3]
return entries
def save_the_scrapes():
from_world_star = scrape_and_store_world()
from_vlad_tv = scrape_and_store_vlad()
from_pan = panties()
mergence = from_world_star + from_vlad_tv + from_pan
random.shuffle(mergence)
for entry in mergence:
post = Post()
post.title = entry['text']
title = post.title
if not Post.objects.filter(title=title):
post.title = entry['text']
post.name = entry['name']
post.url = entry['url']
post.body = entry['comments']
post.image_url = entry['src']
post.video_path = entry['embed']
post.status = 'draft'
post.save()
return mergence
在我添加嵌入键之前一切正常。如果有人能发现我的错误,请让我知道我哪里出错了。感谢。
答案 0 :(得分:1)
关键显然不存在,否则您将无法获得KeyError
。
您未在scrape_and_store_vlad
方法中设置密钥。
def scrape_and_store_vlad():
...
entries = [{'href': url_two + div.a.get('href'),
'src': url_two + div.a.img.get('data-original'),
'text': div.find('p', 'entry-title').text,
'comments': make_soup(url_two + div.a.get('href')).replace("\\", ""),
'name': name,
'url': url_two + div.a.get('href')
} for div in titles][:6]