我试图在幕后的一个视图中运行一个scrapy脚本,以便为我获取一些信息,并尝试了许多不同的方式,例如使用爬网,crackprocess,scrapyd等,所有这些都返回错误。 scrapy脚本可以从其自己的文件完美运行,但是当我尝试从视图运行该脚本时,会遇到很多错误。任何建议表示赞赏。 (尝试将其添加到视图文件底部的def中。)
import scrapy
from scrapy import Spider
from scrapy import Request
from scrapy.crawler import CrawlerProcess
class ProductSpider(scrapy.Spider):
product = input("What product are you looking for? Keywords help for specific products: ")
name = "Product_spider"
allowed_domains=['www.amazon.ca']
start_urls = ['https://www.amazon.ca/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords='+product]
#so that websites will not block access to the spider
download_delay = 30
def parse(self, response):
crawler = self.crawler_process.create_crawler()
# xpath is similar to an address that is used to find certain elements in HTML code,this info is then extracted
product_title = response.xpath('//*/div/div/div/div[2]/div[1]/div[1]/a/@title').extract()
product_price = response.xpath('//span[contains(@class,"s-price")]/text()').extract()
product_url = response.xpath('//*/div/div/div/div[2]/div[1]/div[1]/a/@href').extract()
# yield goes through everything once, saves its spot, does not save info but sends it to the pipeline to get processed if need be
yield{'product_title': product_title, 'product_price': product_price, 'url': product_url,}
#it is checking the same url, no generality, need to find, maybe just do like 5 pages, also see if you can have it sort from high to low and find match with certain amount of key words
process = CrawlerProcess({
'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
})
process.crawl(ProductSpider)
process.start()
from django.shortcuts import render, redirect
from django.contrib.auth.forms import UserCreationForm, AuthenticationForm
from django import forms
from django.contrib.auth.models import User
from .forms import CustomUCreationForm
from django.contrib.auth import authenticate,login,logout
from django.contrib import messages
from .models import Items
from django.core.signals import request_finished
from django.http import HttpRequest
from scrapy.crawler import CrawlerProcess
from scrapyd_api import ScrapydAPI
# Create your views here.
def signup(request):
if request.method == 'POST':
form = CustomUCreationForm(request.POST)
if form.is_valid():
form.save()
else:
form = CustomUCreationForm()
return render(request, 'signuptemplate.html', {'form': form})
def user_login(request):
if request.method == 'POST':
user = authenticate(username=request.POST['username'],password=request.POST['password'])
if user is not None:
#not_active means maybe banned or something
if user.is_active:
login(request,user)
return redirect('home')
else:
messages.error(request,'username or password not correct')
return redirect('login')
return render(request,'design.html')
def home(request):
return render(request,'home.html')
def logout_method(request):
return logout(request,'design.html')
def newitem(request):
if request.method == 'POST':
current_u = request.user
temp_Item = Items(Item = request.POST['new-item-input'],Itemuser = current_u)
temp_Item.save()
return redirect('home')
return render(request,'newitem.html')
def currentprices(request):
#python manage.py runserver --nothreading --noreload
items = Items.objects.all()
scrapyd = ScrapydAPI('http://127.0.0.1:8000/')
scrapyd.schedule('Yourhub2','ProductSpider')
return render(request,'currentprices.html',{'items':items})