#!/usr/bin/env python
import urllib
import mechanize
from bs4 import BeautifulSoup
from urlparse import urlparse
def getPic(search):
search = search.replace(" ","%20")
try:
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.addheaders = [('User-Agent','Mozilla')]
htmltext = browser.open("https://www.google.com/search?site=&tbm=isch&source=hp&biw=1855&bih=990&q=" + search + "&oq=" +search)
img_url = []
formatted_images = []
soup = BeautifulSoup(htmltext)
results = soup.findAll("a")
for r in results:
try:
if "imgres?imgurl" in r['href']:
img_url.append(r['href'])
except:
a=0
for im in img_url:
refer_url = urlparse(str(img_url[0]))
return refer_url.query.split("&")[0].replace("imgurl=","")
return formatted_images
except:
print "error"
print getPic("occupy wall street")
不是将图像的链接作为输出而是获得" []"作为一个输出。可以找出我的代码的问题是什么。
答案 0 :(得分:0)
Google仅向使用JavaScript的浏览器发送"imgres?imgurl"
但mechanize.Browser()
就像没有JavaScript的浏览器。
在浏览器中关闭JavaScript并查看Google发送的HTML。