Question

请帮助修复脚本。

import urllib
import re
import os
import pprint

import requests
import bs4


def make_catalog():
    try: 
        os.mkdir('GRAB')
    except FileExistsError:
        print('FileExistsError')
    except PermissionError :
        print('PermissionError ')
    except Exception:
        print(Exception)                


def change_catalog():
    try: 
        os.chdir('GRAB')
    except PermissionError :
        print('PermissionError ')
    except Exception:
        print(Exception)


def download_image(path, name):
    #urllib.URLopener().retrieve(prefix + path, name)
    img = urllib.request.urlopen(prefix + path).read()
    try:
        f = open(name, "wb")
        if f:
            print('open!!!')
        if f.write(img):
            print('write!!!')
    except OSError:
        print('OSError')
    except Exception:
        print(Exception)    
    finally:
        f.close()


beginIndex = 5794
endIndex = 5800
prefix = "http://www.inpic.ru"
rep_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '-' , ' ']
make_catalog()
change_catalog()

for i in range(beginIndex, endIndex):
    req = requests.get(prefix + '/image/' + str(i))
    if req.status_code == requests.codes.ok:
        #print(i, '\t', req.status_code, '\t', req, end='\n')
        soup = bs4.BeautifulSoup(req.content)
        #print(soup.prettify())
        name = soup.find("td", {"class": "post_title"}).contents[1].contents
        #author = soup.find("div", {"class": "date_author"}).contents[1].contents
        print('NAME: ', name[0])
        #print(author[0])

        #name[0] = re.sub('[\\\\/:*?"<>|-]', '_', name[0])
        for char in rep_chars:
            name[0] = name[0].replace(char, '_')

        print('newNAME: ', name[0])

        mainImagePath = soup.find("img", {"class": "image last"})["src"]
        mainImageExt = mainImagePath.split('.')[-1]
        manyImages = soup.findAll("img", {"class": "image"})

        print('MAINUMAGE: ', mainImagePath)
        print('MAINIMAGE EXT: ',mainImageExt)
        print('MANYIMAGE: \n')
        pprint.pprint(manyImages)

        if len(manyImages) > 1:
            print('CATALOG MAKE')
            try: 
                os.mkdir(name[0])
            #except FileExistsError:
                #print('FileExistsError')
            except PermissionError :
                print('PermissionError')
            except Exception:
                print(Exception)                

            os.chdir(name[0])
            #download_image(mainImagePath, str(name[0]) + '_0.' + mainImageExt)
            i = 0
            for name in manyImages:
                #print(name['src'], end='------------\n')
                download_image(name['src'], str(name['src']))
                i = i + 1
            os.chdir('../')

        else:
            print('IMAGE MAKE')
            download_image(mainImagePath, str(name[0]) + '.' + mainImageExt)
            print('mainImagePath', mainImagePath)
            print('name', str(name[0]) + '.' + mainImageExt)

        print('==================================')

从页面组录制图像时出现的问题 http://www.inpic.ru/image/5797/

显示以下错误消息：

Traceback (most recent call last):
   File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 98, in <module>
     download_image(name['src'], str(name['src']))
   File "C:\VINT\OPENSERVER\OpenServer\domains\localhost\python\parse_html\1\q.py", line 46, in download_image
     f.close()
UnboundLocalError: local variable 'f' referenced before assignment

Answer 1

您正在尝试关闭无法打开的文件。从未分配过f，因为open()调用引发了异常。

不是关闭finally处理程序中的文件对象，而是将其用作上下文管理器：

def download_image(path, name):
    #urllib.URLopener().retrieve(prefix + path, name)
    img = urllib.request.urlopen(prefix + path).read()
    try:
        with open(name, "wb") as f:
            print('open!!!')
            f.write(img)
            print('write!!!')
    except OSError:
        print('OSError')
    except Exception:
        print(Exception)

此处with语句将确保f在成功打开时关闭，无论发生什么。

录制后如何关闭文件？

1 个答案: