我有一个我从我的项目调用的类,它通过url:data / refresh
执行urls.py
from django.conf.urls import url, include
from . import views
from rest_framework import routers
router = routers.DefaultRouter()
urlpatterns = [
url(r'^refresh/$', views.refresh),
url(r'^$', views.index, name='index'),
url(r'^', include(router.urls)),
url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')),
]
和我的 views.py
def refresh(request):
gatorate = Gatorate()
r = gatorate.start()
if r["code"] < 0:
return {"code":-1,"error":r["error"]}
# while spider.has_next_page() == True:
data = gatorate.run()
# #run last page
# spider.run()
return JsonResponse({"code":1,"data":data}
当我在开发中运行时,该类运行并填充我的数据库,并且在生产中它仅在我启动python,导入我的模块并从服务器上的ssh执行它时才起作用。 但是,当我打开它的URL,或使用执行URL的按钮时,我得到502错误,或者,我没有收到错误,它只是没有填充。 我想知道是否存在不允许我运行脚本的权限问题,或者如果它甚至没有意义就不会进入虚拟环境。此外,如果有人可以建议一种方法来自动运行每天运行,我计划使用CRON,但我喜欢灵活地从URL远程执行它可能使用lambda来调用它。我在VPC内部已经将所有内容都安装在我的RDS上。最重要的是我只想让事情发挥作用。谢谢你的进步。
spider.spider.py:
import sqlite3
import MySQLdb
import time
import os
import django
os.environ["DJANGO_SETTINGS_MODULE"] = 'web.settings'
django.setup()
from django.utils import timezone
from webservice.models import BSR
from vardata import ASINS
class Gatorate:
def __init__(self):
self.amazon = None
self.product = None
self.asins = None
self.ASINS = ASINS
def start(self):
#options = webdriver.ChromeOptions()
#options.binary_location = '/usr/bin/chromedriver'
#options.add_argument('headless')
#self.driver = webdriver.Chrome(chrome_options=options)
#self.driver.get(self.url)
try:
self.amazon = AmazonAPI(AMAZON_ACCESS_KEY, AMAZON_SECRET_KEY, AMAZON_ASSOC_TAG,region="US")
# options = webdriver.ChromeOptions()
# options.binary_location = '/usr/bin/google-chrome-stable'
# # options.add_argument('headless')
# self.driver = webdriver.Chrome(chrome_options=options)
# #wait = WebDriverWait(self.driver, self.sleep)
# self.driver.get(self.url)
return {"code":1}
except Exception as e :
#self.driver.quit()
return {"code":-1,"error": str(e)}
def get_asins(self):
try:
asins = self.ASINS
return {"code":1,"data":asins}
except Exception as e:
#print e
return {"code":-2,"error":e}
def refresh_asins(self,asins):
"""Extracts cars from page
@scope:
@param: list
@return list"""
extract = []#list to hold the extracted data
print asins
if asins is None:
return{"code":-1,"error":"Not defined"}
if len(asins) == 0:
return {"code":-2,"error":"Empty value"}
for asin in set(asins):
print 'finding sku: '+ str(asin)
try:
#by ASIN or BY SKU choose one
time.sleep(1)
self.product = self.amazon.lookup(ItemId=asin)
# SKU
# self.product = self.amazon.lookup(ItemId=sku, IdType="SKU",SearchIndex='All')
print str(self.product)
try:
ASIN = asin
except Exception as e:
ASIN = "N/A"
print e
try:
Brand = self.product.get_attribute("Brand")
except Exception as e:
Brand = ""
print e
try:
selling_price = self.product.price_and_currency
selling_price = selling_price[0]
except Exception as e:
price = ""
print e
try:
currency = self.product.price_and_currency
currency = currency[1]
except Exception as e:
price = ""
print e
try:
availability = self.product.availability
except Exception as e:
availability = ""
print e
try:
sales_rank = self.product.sales_rank
except Exception as e:
sales_rank = ""
print e
try:
Binding = self.product.get_attribute("Binding")
time.sleep(5)
except Exception as e:
Binding = ""
print e
try:
Color = self.product.get_attribute("Color")
time.sleep(5)
except Exception as e:
Color = ""
print e
try:
Title = self.product.get_attribute("Title")
time.sleep(5)
except Exception as e:
Title = ""
print e
try:
PackageQuantity = self.product.get_attribute("PackageQuantity")
time.sleep(5)
except Exception as e:
PackageQuantity = ""
print e
try:
ProductGroup = self.product.get_attribute("ProductGroup")
time.sleep(5)
except Exception as e:
ProductGroup = ""
print e
c = BSR(
ASIN=asin,
Brand=Brand,
Selling_Price=selling_price,
Currency=currency,
Availability=availability,
BSR=sales_rank,
Binding=Binding,
Color=Color,
Product_Description=Title,
Package_Quantity=PackageQuantity,
Product_Group=ProductGroup
).save()
except Exception as e:
print e
def run(self):
""" Run me"""
r = self.get_asins()
if r["code"] < 0:
return {"code":-1,"error":r["error"]}
print r["data"]
time.sleep(5)
r = self.refresh_asins(r["data"])
time.sleep(5)
# print "this is what we are saving"+str(r)
# r = self.save_skus(r)
# if self.has_next_page():
# self.go_to_next_page()
return {"code":1,"data":"complete"}
答案 0 :(得分:0)
最好将刷新操作公开为Django management command并为其安排cron作业。
您在此处执行的长时间运行操作会导致502或504 http错误,具体取决于服务器处理它的方式。
答案 1 :(得分:0)
事实证明,因为我正在运行虚拟环境,所以我只需要激活我的python和我的virtualevn以便命令可以工作。命令也花了太长时间,这就是为什么我得到了502,因为它超时了。该脚本大约需要5分钟才能运行。所以现在使用crontab我可以激活virtualenv并运行命令。最后为了确保我在网站上有一个按钮进行刷新,我有第二个cron检查数据库中设置为false的值。如果用户单击该按钮,则会将db中的该值更改为True。下次我的cron作业检查值是否为真时,它将执行刷新。