我正在试图弄清楚如何让我的应用程序使用一个在背景中擦除网站的功能,因为它需要很长时间并且如果在前台运行会导致错误。所以我遵循了Heroku网站上的教程,该教程具有计算单词并在后台运行的功能。有用。所以我准备首先通过导入将我的功能放在那里。所以我导入了它并创建了一个使用它的函数。我得到了这个追溯
Traceback (most recent call last):
File "my_raddqueue.py", line 2, in <module>
from src.blog.my_task import conn, is_page_ok
File "/Users/ray/Desktop/myheroku/practice/src/blog/my_task.py", line 5, in <module>
from .my_scraps import p_panties
File "/Users/ray/Desktop/myheroku/practice/src/blog/my_scraps.py", line 3, in <module>
from .models import Post
File "/Users/ray/Desktop/myheroku/practice/src/blog/models.py", line 3, in <module>
from taggit.managers import TaggableManager
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/taggit/managers.py", line 7, in <module>
from django.contrib.contenttypes.models import ContentType
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/django/contrib/contenttypes/models.py", line 159, in <module>
class ContentType(models.Model):
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/django/contrib/contenttypes/models.py", line 160, in ContentType
app_label = models.CharField(max_length=100)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/django/db/models/fields/__init__.py", line 1072, in __init__
super(CharField, self).__init__(*args, **kwargs)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/django/db/models/fields/__init__.py", line 166, in __init__
self.db_tablespace = db_tablespace or settings.DEFAULT_INDEX_TABLESPACE
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/django/conf/__init__.py", line 55, in __getattr__
self._setup(name)
File "/Users/ray/Desktop/myheroku/practice/lib/python3.5/site-packages/django/conf/__init__.py", line 41, in _setup
% (desc, ENVIRONMENT_VARIABLE))
django.core.exceptions.ImproperlyConfigured: Requested setting DEFAULT_INDEX_TABLESPACE, but settings are not configured. You must either define the environment variable DJANGO_SETTINGS_MODULE or call settings.configure() before accessing settings.
我甚至尝试在my_task.py中创建该函数并运行它并获得相同的回溯
这是我的文件结构
下面是我认为与问题再现相关的文件和代码
我想要使用的功能位于my_scraps.py
import requests
from bs4 import BeautifulSoup
from .models import Post
import random
import re
from django.contrib.auth.models import User
import os
def p_panties():
def swappo():
user_one = ' "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0" '
user_two = ' "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5)" '
user_thr = ' "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko" '
user_for = ' "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:10.0) Gecko/20100101 Firefox/10.0" '
agent_list = [user_one, user_two, user_thr, user_for]
a = random.choice(agent_list)
return a
headers = {
"user-agent": swappo(),
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"accept-encoding": "gzip,deflate,sdch",
"accept-language": "en-US,en;q=0.8",
}
pan_url = 'http://www.example.org'
shtml = requests.get(pan_url, headers=headers)
soup = BeautifulSoup(shtml.text, 'html5lib')
video_row = soup.find_all('div', {'class': 'post-start'})
name = 'pan videos'
if os.getenv('_system_name') == 'OSX':
author = User.objects.get(id=2)
else:
author = User.objects.get(id=3)
def youtube_link(url):
youtube_page = requests.get(url, headers=headers)
soupdata = BeautifulSoup(youtube_page.text, 'html5lib')
video_row = soupdata.find_all('p')[0]
entries = [{'text': div,
} for div in video_row]
tubby = str(entries[0]['text'])
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', tubby)
cleaned_url = urls[0].replace('?&autoplay=1', '')
return cleaned_url
def yt_id(code):
the_id = code
youtube_id = the_id.replace('https://www.youtube.com/embed/', '')
return youtube_id
def strip_hd(hd, move):
str = hd
new_hd = str.replace(move, '')
return new_hd
entries = [{'href': div.a.get('href'),
'text': strip_hd(strip_hd(div.h2.text, '– Official video HD'), '– Oficial video HD').lstrip(),
'embed': youtube_link(div.a.get('href')),
'comments': strip_hd(strip_hd(div.h2.text, '– Official video HD'), '– Oficial video HD').lstrip(),
'src': 'https://i.ytimg.com/vi/' + yt_id(youtube_link(div.a.get('href'))) + '/maxresdefault.jpg',
'name': name,
'url': div.a.get('href'),
'author': author,
'video': True
} for div in video_row][:13]
for entry in entries:
post = Post()
post.title = entry['text']
title = post.title
if not Post.objects.filter(title=title):
post.title = entry['text']
post.name = entry['name']
post.url = entry['url']
post.body = entry['comments']
post.image_url = entry['src']
post.video_path = entry['embed']
post.author = entry['author']
post.video = entry['video']
post.status = 'draft'
post.save()
post.tags.add("video", "Musica")
return entries
my_task.py
import os
import redis
from rq import Worker, Queue, Connection
from .my_scraps import p_panties
import requests
listen = ['high', 'default', 'low']
redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6379')
conn = redis.from_url(redis_url)
if __name__ == '__main__':
with Connection(conn):
worker = Worker(map(Queue, listen))
worker.work()
def is_page_ok(url):
response = requests.get(url)
if response.status_code == 200:
return "{0} is up".format(url)
else:
return "{0} is not OK. Status {1}".format(url, response.status_code)
def do_this():
a = p_panties()
return a
my_raddqueue.py
from rq import Queue
from src.blog.my_task import conn, do_this
q = Queue('important', connection=conn)
result = q.enqueue(do_this)
print("noted")
这一行
from .my_scraps import p_panties
即使我没有使用它,也会导致回溯。在我放弃尝试使用函数之后,我试图使用并查看另一个是否有效,但他们没有解决原因,直到我开始删除或评论事情时,我才发表评论或删除此内容排除它的工作原理。我的问题是什么?我想要做的就是让我的应用程序在我的heroku应用程序中在预先指定的时间刮擦。我怎样才能做到这一点?我的方法在这里都错了吗?我见过一个叫做APSscheduler的东西我应该用它代替。任何有关改进我的代码的意见将不胜感激。很久没有编码了。很多这都来自我自己的头脑,所以如果它看起来不专业,那么为什么要事先感谢你
答案 0 :(得分:1)
我不确定Heroku。但通常你可以通过Celery在django中实现这样的自动化任务。
你这里有很棒的文档。 http://docs.celeryproject.org/en/latest/django/first-steps-with-django.html