使用链接; image on flickr,请求只返回html到评论:
`<!-- rendered with love by pprd1-node580-lh1.manhattan.bf1.yahoo.com -->`
(见下面的html图片)。
我想访问下面img
元素3 div
元素中的链接,所以非常感谢任何输入。
from bs4 import BeautifulSoup
import logging
import os
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import shutil
import sys
import time
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - \
%(levelname)s - %(message)s")
def flickr_images():
try:
search_term, number_images = sys.argv[1:]
num_req_images = int(number_images)
except ValueError:
print("Something went wrong. Command line input must be of \
format: 'filename searchterm numberimages'")
return
# navigate to search results page
driver = webdriver.Firefox()
# poll DOM for max 10 secs if element not immediately available
driver.implicitly_wait(10)
driver.get("https://www.flickr.com/search/?text=" + search_term)
driver.maximize_window()
# 0sec wait = 25images, 1sec = 48, 3+sec = 98
time.sleep(3)
image_link_elems = driver.find_elements_by_class_name("overlay")
# Incase requested is > found
num_images_tosave = min(req_images, len(image_link_elems))
image_elems_tosave = image_link_elems[:num_images_tosave]
print("{} images found.".format(num_images_tosave))
logging.info("Length photos: {}".format(len(image_link_elems)))
# extract image src's from found elements
src_links = []
image_links = [link.get_attribute("href") for link in image_elems_tosave]
for image_link in image_links:
res = requests.get(image_link)
res.raise_for_status
soup = bs4.BeautifulSoup(res.text, "html.parser")
src_elem = soup.select(".zoom-small")
HTML图片: