请帮助修复脚本。
import urllib
import re
import os
import pprint
import requests
import bs4
def make_catalog():
try:
os.mkdir('GRAB')
except FileExistsError:
print('FileExistsError')
except PermissionError :
print('PermissionError ')
except Exception:
print(Exception)
def change_catalog():
try:
os.chdir('GRAB')
except PermissionError :
print('PermissionError ')
except Exception:
print(Exception)
def download_image(path, name):
#urllib.URLopener().retrieve(prefix + path, name)
img = urllib.request.urlopen(prefix + path).read()
try:
f = open(name, "wb")
if f:
print('open!!!')
if f.write(img):
print('write!!!')
except OSError:
print('OSError')
except Exception:
print(Exception)
finally:
f.close()
beginIndex = 5794
endIndex = 5800
prefix = "http://www.inpic.ru"
rep_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '-' , ' ']
make_catalog()
change_catalog()
for i in range(beginIndex, endIndex):
req = requests.get(prefix + '/image/' + str(i))
if req.status_code == requests.codes.ok:
#print(i, '\t', req.status_code, '\t', req, end='\n')
soup = bs4.BeautifulSoup(req.content)
#print(soup.prettify())
name = soup.find("td", {"class": "post_title"}).contents[1].contents
#author = soup.find("div", {"class": "date_author"}).contents[1].contents
print('NAME: ', name[0])
#print(author[0])
#name[0] = re.sub('[\\\\/:*?"<>|-]', '_', name[0])
for char in rep_chars:
name[0] = name[0].replace(char, '_')
print('newNAME: ', name[0])
mainImagePath = soup.find("img", {"class": "image last"})["src"]
mainImageExt = mainImagePath.split('.')[-1]
manyImages = soup.findAll("img", {"class": "image"})
print('MAINUMAGE: ', mainImagePath)
print('MAINIMAGE EXT: ',mainImageExt)
print('MANYIMAGE: \n')
pprint.pprint(manyImages)
if len(manyImages) > 1:
print('CATALOG MAKE')
try:
os.mkdir(name[0])
#except FileExistsError:
#print('FileExistsError')
except PermissionError :
print('PermissionError')
except Exception:
print(Exception)
os.chdir(name[0])
#download_image(mainImagePath, str(name[0]) + '_0.' + mainImageExt)
i = 0
for name in manyImages:
#print(name['src'], end='------------\n')
download_image(name['src'], str(name['src']))
i = i + 1
os.chdir('../')
else:
print('IMAGE MAKE')
download_image(mainImagePath, str(name[0]) + '.' + mainImageExt)
print('mainImagePath', mainImagePath)
print('name', str(name[0]) + '.' + mainImageExt)
print('==================================')
从页面组录制图像时出现的问题 http://www.inpic.ru/image/5797/
显示以下错误消息:
Traceback (most recent call last):
File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 98, in <module>
download_image(name['src'], str(name['src']))
File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 46, in download_image
f.close()
UnboundLocalError: local variable 'f' referenced before assignment
答案 0 :(得分:6)
您正在尝试关闭无法打开的文件。从未分配过f
,因为open()
调用引发了异常。
不是关闭finally
处理程序中的文件对象,而是将其用作上下文管理器:
def download_image(path, name):
#urllib.URLopener().retrieve(prefix + path, name)
img = urllib.request.urlopen(prefix + path).read()
try:
with open(name, "wb") as f:
print('open!!!')
f.write(img)
print('write!!!')
except OSError:
print('OSError')
except Exception:
print(Exception)
此处with
语句将确保f
在成功打开时关闭,无论发生什么。