我有这个Python脚本用于抓取tumblr博客的图像URL,并希望将它们下载到桌面上的本地文件夹中。我将如何实施这个
import requests
from bs4 import BeautifulSoup
def make_soup(url):
#downloads a page with requests and creates a beautifulsoup object
raw_page = requests.get(url).text
soup = BeautifulSoup(raw_page)
return soup
def get_images(soup):
#pulls images from the current page
images = []
foundimages = soup.find_all('img')
for image in foundimages:
url = img['src']
if 'media.tumblr.com' in url:
images.append(url)
return images
def scrape_blog(url):
# scrapes the entire blog
soup = make_soup(url)
next_page = soup.find('a' id = 'nextpage')
while next_page is not none:
soup = make_soup(url + next_page['href'])
next_page = soup.find('a' id = 'nextpage')
more_images = get_images(soup)
images.extend(more_images)
return images
url = 'http://x.tumblr.com'
images = scrape_blog(url)