在我的django项目中,我正在使用python脚本抓取数据。我将所有字段临时存储在pandas数据框中,然后对其进行迭代以将所有信息保存在Django模型中。我能够做到这一点的唯一方法是将png在模型中保存为models.BinaryField。我使用html模板在报告中显示模型的字段,但是图像显示为“ <>处的内存”而不是显示图像。如何显示图像?
crawl.py
# Import Libraries
import requests
from selenium import webdriver
from lxml import html
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
from selenium.webdriver.chrome.options import Options
def crawl_website(product, xpath_dict):
# Set up parameters
base_url = 'https://www.website.com/product/{sku}/sellers'
header = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'}
product = product
product_name = product.title
map_price = product.map_price
asin = product.asin
sku = product.sku
# Retrieve Webpage
full_url = base_url.format(sku = sku)
time_stamp = pytz.utc.localize(datetime.utcnow())
page = requests.get(full_url,
headers = headers)
doc = html.fromstring(page.content)
# Extract Price Field
original_price = doc.xpath(xpath_dict['original_price'])
# Discount
discount = [str(100 * max(0.0, round(1-float(i) / float(map_price),2))) + '%' for i in original_price]
# MAP Violation Field
map_violation = [float(i) < float(map_price) for i in original_price]
# Extract Seller Names
seller_name = doc.xpath(xpath_dict['seller_name'])
# If a violation is found, take a screenshot
screenshot = None
if True in map_violation:
# Screenshot of Current URL
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920,1080")
DRIVER = 'chromedriver'
driver = webdriver.Chrome(DRIVER, chrome_options=chrome_options)
driver.get(full_url)
screenshot = driver.get_screenshot_as_png()
driver.quit()
# Extract Seller Links
seller_link = doc.xpath(xpath_dict['seller_link'])
# Create DataFrame
total_rows = len(seller_name)
if True in map_violation:
df = pd.DataFrame({
'Product_Name' : np.repeat(product_name, total_rows),
'ASIN' : np.repeat(asin, total_rows),
'SKU': np.repeat(sku, total_rows),
'Time_Stamp': np.repeat(time_stamp, total_rows),
'Seller_Name': seller_name,
'Seller_URL': seller_link,
'MAP_Price' : np.repeat(map_price, total_rows),
'Current_Price': original_price,
'Discount' : discount,
'MAP_Violation' : map_violation,
'Screenshot' : np.repeat(screenshot, total_rows)
})
else:
df = pd.DataFrame({
'Product_Name' : np.repeat(product_name, total_rows),
'ASIN' : np.repeat(asin, total_rows),
'SKU': np.repeat(sku, total_rows),
'Time_Stamp': np.repeat(time_stamp, total_rows),
'Seller_Name': seller_name,
'Seller_URL': seller_link,
'MAP_Price' : np.repeat(map_price, total_rows),
'Current_Price': original_price,
'Discount' : discount,
'MAP_Violation' : map_violation
})
return(df)
views.py
import pandas as pd
from datetime import datetime
from django.shortcuts import render, redirect
from products.models import Product
from sellers.models import Seller
from sellers.models import Seller_Price
from .crawl_website import crawl_website
def crawl_prices(request):
if request.user.is_superuser:
products = Product.objects.order_by('-date_added')
else:
products = Product.objects.order_by('-date_added').filter(client_id=request.user)
for product in products:
# Crawl Website
if product.sku:
df = crawl_website(product, xpath_dict)
for i in range(len(df)):
row = df.iloc[i]
# Create Seller Object if it doesn't exist
seller_obj, created = Seller.objects.get_or_create(name=row['Seller_Name'])
# Update Previous Seller_Product records 'current' to False
Seller_Price.objects.all().filter(seller_id=seller_obj, product_id=product).update(latest_update=False)
# Record screenshot if there is a violation
if row['MAP_Violation']:
seller_price_obj = Seller_Price.objects.create(
seller_id=seller_obj,
product_id=product,
date_reported=row['Time_Stamp'],
url=row['Seller_URL'],
seller_price=row['Current_Price'],
discount=row['Discount'],
violation=row['MAP_Violation'],
violation_snapshot=row['Screenshot']
)
else:
seller_price_obj = Seller_Price.objects.create(
seller_id=seller_obj,
product_id=product,
date_reported=row['Time_Stamp'],
url=row['Seller_URL'],
seller_price=row['Current_Price'],
discount=row['Discount'],
violation=row['MAP_Violation']
)
return redirect('/admin')
from django.views.generic import View
from django.utils import timezone
from .models import *
# This passes database objects to html template for reports
class Pdf(View):
def get(self, request):
seller_price = Seller_Price.objects.order_by('-date_reported').filter(product_id__client_id=request.user, latest_update=True)
today = timezone.now()
params = {
'today': today,
'seller_price': seller_price,
'request': request
}
report.html
<!doctype html>
<html>
<head>
{% load staticfiles %}
<meta charset="utf-8">
<title>Sales Report</title>
</head>
<body>
{% for sp in seller_price %}
Seller Name: {{ sp.seller_id }}
Image: {{ sp.violation_snapshot }}
{% endfor %}
</body>
</html>
答案 0 :(得分:2)
我强烈建议不要使用二进制字段来保存实际文件,但是如果您有有效的用途,这是我能想到的最好的方法。
假设您知道django模板过滤器是什么,
@register.filter(name='binary_to_image')
def encode_binary_data_to_image(binary):
return # return your encoded binary, I would suggest base64.encode which is pretty simple
并在模板中
<img src = "data:image/png;base64,{{objects.binary_field|binary_to_image}}">
现在您已经在浏览器中渲染了图像。
但是不能再说一遍,如果您没有有效的真正有效原因,请不要坚持使用数据库存储图像。
修改完问题后,您要在哪里做
screenshot = driver.get_screenshot_as_png()
您可以添加或重构代码,以将屏幕截图保存到媒体文件夹中,例如
screenshot = driver.save_screenshot('/path/to/image/dir/file.name')
因此它将像django.db.models.ImageField一样将其保存到您的图像目录中,因此您可以像这样从模型中读取它
<img src="{{object.image_field.url}}"></img>
答案 1 :(得分:0)
这可能有效。我没有办法测试这个。 credit to this post
您将需要为二进制编码数据生成Base64编码
import base64
encoded = base64.b64encode("BinaryField as ByteArray")
然后确保将sp.violation_snapshot返回为base64编码。
然后您可以像这样使用它。
<img src="data:image/gif;base64,{{ sp.violation_snapshot }}">