此代码应该询问2个可选参数和1个必需参数,'format'和'type'参数是可选的,'url'是必需参数,它将是您要从中下载图像的网站地址(jpg或/和.png类型图像,取决于你在'type'参数中给出的值,并将其保存为单个图像或单个JSON文件(取决于你在'format'参数中给出的值),我得到错误你可以告诉我哪里出错了,这是我的代码..
import argparse
import base64
import json
import os
from bs4 import BeautifulSoup
import requests
def scrape(url,format_,type_):
try:
page = requests.get(url)
except requests.RequestException as rex:
print(str(rex))
else:
soup = BeautifulSoup(url,'html.parser')
images = _fetch_images(soup,url)
images = _filter_images(images,type_)
_save(images,format_)
def _fetch_images(soup,base_url):
images = []
for img in soup.find_all('img'):
src = img.get('src')
img_url = ('{base_url}/{src}'.format(base_url,src))
name = img_url.split('\\')[-1]
images.append(dict(name=name,url=img_url))
return images
def _filter_images(images,type_):
if type == 'all':
return images
ext_map = {'png':['.png'],'jpg':['.jpg','.jpeg']}
return [img for img in images if
_match_extension(img['name'],ext_map(type_))]
def _match_extension(filename,extension_list):
name,extension = os.path.splittext(filename.lower())
return extension in extension_list
def _save(images,format_):
if images:
if format_=='img':
_save_images(images)
else:
_save_json(images)
print('Done!')
else:
print('there are no images!')
def _save_images(images):
for img in images:
img_data = requests.get(img['url']).content
with open(img['name'],'wb') as f:
f.write(img_data)
def _save_json(images):
data = {}
for img in images:
img_data = requests.get(img['url']).content
b64_img_data = base64.b64encode(img_data)
str_img_data = b64_img_data.decode('utf-8')
data[img['name']]=str_img_data
with open('images.json','w') as ijson:
ijson.write(json.dump(data))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Scrape a webpage.')
parser.add_argument(
'-t',
'--type',
choices=['all', 'png', 'jpg'],
default='all',
help='The image type we want to scrape.')
parser.add_argument(
'-f',
'--format',
choices=['img', 'json'],
default='img',
help='The format images are saved to.')
parser.add_argument(
'url',
help='The URL we want to scrape for images.')
args = parser.parse_args()
args.url = str(input('enter the url\n'))
args.format = str(input('enter img or json \n'))
args.type = str(input('enter all or png or jpg'))
scrape(args.url,args.format,args.type)
我想在PyCharm中运行它,我得到的错误是,
usage: scrape.py [-h] [-t {all,png,jpg}] [-f {img,json}] url
scrape.py: error: the following arguments are required: url
答案 0 :(得分:0)
为了在Pycharm中将参数传递给您的函数,请导航到Run -> Edit Configurations
并在那里设置参数。
或者,尝试使用命令行指定的参数调用程序。