Python-使用请求刮取图像

时间:2018-07-04 09:56:07

标签: python request python-requests

我无法在该位置保存/下载图像。尽管代码似乎正确,但我无法弄清楚问题。

我正在使用请求库来抓取图像。

import os
import urllib
import urllib.request
from bs4 import BeautifulSoup
import requests
import re

from lxml.html import fromstring

r = requests.get("https://www.scoopwhoop.com/subreddit-nature/#.lce3tjfci")
data = r.text
soup = BeautifulSoup(data, "lxml")

title = fromstring(r.content).findtext('.//title')

#print(title)


newPath = r'C:\Users\Vicky\Desktop\ScrappedImages\ ' + title

for link in soup.find_all('img'):
    image = link.get('src')
    if 'http' in image:
        print(image)
        imageName = os.path.split(image)[1]
        print(imageName)

        r2 = requests.get(image)

        if not os.path.exists(newPath):
            os.makedirs(newPath)
            with open(imageName, "wb") as f:
                f.write(r2.content)

2 个答案:

答案 0 :(得分:0)

尝试在<div class="img fadeIn" > <img src="logo.png" height="90px" width="320px" onclick="change(this)" id="koo" alt="logo"/> </div>r = requests.get("https://www.scoopwhoop.com/subreddit-nature/#.lce3tjfci")语句中包装try:,以确保要抓取的网站返回200响应,这可能是网站超时或无法满足您的请求。

答案 1 :(得分:0)

import os
from bs4 import BeautifulSoup
import urllib
import requests
import urlparse

from lxml.html import fromstring

r = requests.get("https://www.scoopwhoop.com/subreddit-nature/#.lce3tjfci")
data = r.text
soup = BeautifulSoup(data, "lxml")

for link in soup.find_all('img'):
    image = link.get('src')
    if bool(urlparse.urlparse(image).netloc):
        print(image)
        imageName = image[image.rfind("/")+1:]
        print(imageName)

        urllib.urlretrieve(image,imageName)