我正在创建一个应用程序,以便使用flask获取用户输入,以从网站抓取图像。但是,应用过程并没有结束,而是继续在终端中给出此消息。我实际上想在抓取图像后终止烧瓶
“ 127.0.0.1--[14 / Nov / 2018 12:00:44]” GET /static/script.js HTTP / 1.1“ 304-“
import os
import sys
import urllib.request
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from flask import Flask, render_template, request, redirect
ic = Flask(__name__)
count = 0
@ic.route("/")
def main():
if count == 1:
return render_template("index.html", result=str((str(count) + " Image Downloaded !")))
else:
return render_template("index.html", result=str((str(count) + " Images Downloaded !")))
@ic.route("/get_images", methods=['POST'])
def get_images():
_url = request.form['inputURL']
try:
global count
count = 0
code = requests.get(_url)
text = code.text
soup = BeautifulSoup(text)
for img in soup.findAll('img'):
count += 1
if (img.get('src'))[0:4] == 'http':
src = img.get('src')
else:
src = urljoin(_url, img.get('src'))
download_image(src, count)
return redirect("http://localhost:5000")
except requests.exceptions.HTTPError as error:
return render_template("index.html", result=str(error))
def download_image(url, num):
try:
image_name = str(num) + ".png"
image_path = os.path.join("images/", image_name)
urllib.request.urlretrieve(url, image_path)
except ValueError:
print("Invalid URL !")
except:
print("Unknown Exception" + str(sys.exc_info()[0]))
if __name__ == "__main__":
ic.run()
index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="refresh" content="100;url=http://localhost:5000/"/>
<title>Image Crawler</title>
<link href="../static/style.css" rel="stylesheet">
</head>
<body class="body">
<div class="container">
<div class="header">
<h3 class="text-muted">Image Crawler</h3>
</div>
<div class="jumbotron">
<form name="myForm" class="form" onsubmit="return checkURL()" method="post" action="/get_images">
<h1>Enter URL</h1>
<input type="name" name="inputURL" class="input-text" id="inputURL" placeholder="URL"
required autofocus>
<br>
<button class="btn" id="btnSubmit" type="submit">Download Photos!</button>
</form>
</div>
<div class="jumbotron">
<h3>{{ result }}</h3>
</div>
</div>
</body>
</html>
答案 0 :(得分:0)
我进行了测试并重新处理了您的问题,以下是有效的解决方案。有很多错误和错误,但我已将其修复。我不会测试您的表单,但是如果您提供并处理良好的url,它可能会工作。如果出现问题请发表评论
import os
import sys
import urllib.request
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from flask import Flask, render_template, request, redirect
ic = Flask(__name__)
count = 0
@ic.route("/", methods=['GET'])
def main():
if count == 1:
return render_template("index.html", result=str((str(count) + " Image Downloaded !")))
else:
return render_template("index.html", result=str((str(count) + " Images Downloaded !")))
@ic.route("/get_images", methods=['POST', 'GET'])
def get_images():
_url = 'https://www.bljesak.info' # PROVIDE URL HERE MANUALLY OR FROM A FORM
try:
global count
count = 0
code = requests.get(_url)
text = code.text
soup = BeautifulSoup(text, 'html.parser')
for img in soup.findAll('img'):
count += 1
print(img.get('src'))
if (img.get('src'))[0:4] == 'https':
src = img.get('src')
download_image(src, count)
else:
src = urljoin(_url, img.get('src'))
download_image(src, count)
return redirect("http://localhost:5000")
except requests.exceptions.HTTPError as error:
return render_template("index.html", result=str(error))
def download_image(url, num):
try:
image_name = str(num) + '.png'
image_path = os.path.join("images/", image_name)
print(image_name, image_path) # WAIT FOR ALL TO FINISH, IF THERE IS A LOT OF IMAGES, YOU NEED TO WAIT
urllib.request.urlretrieve(url, image_path)
except ValueError:
print("Invalid URL !")
except:
print("Unknown Exception" + str(sys.exc_info()[0]))
if __name__ == "__main__":
ic.run()
此外,如果您要输入网址,则输入的类型为url
。您不需要使用name
类型。