我写了一个蜘蛛从android客户端中的用户witch获取名称,服务器将在其中找到具有该名称的网站中的链接并将其发送回client(android)。 蜘蛛提取链接并存储到数据库中。
这是蜘蛛代码:
import scrapy
import scrapy.spiders
import json
from Movie.items import SearchItem
class SearchSpyder(scrapy.Spider):
name = "search"
allowed_domains = ["dibamoviez.pw"]
def __init__(self, category, movieName, *args, **kwargs):
super(SearchSpyder, self).__init__(*args, **kwargs)
self.item = SearchItem()
self.item['movieName'] = movieName
self.item['category'] = category
self.start_urls = [f"http://dibamoviez.pw/?s={movieName.replace(' ','+')}"]
def parse(self, response):
def cat(x):
return {
'movie': 'فیلم',
'serial': 'سریال'
}[x]
for href , title in zip(response.css("body > div[id='Body'] > main > div > a::attr('href')").extract() , response.css("body > div[id='Body'] > main > div > a > h4::text").extract()):
if cat(self.item['category']) in title:
self.item['link'] = href
print(href)
yield self.item
这是网络服务:
class Schedule(WsResource):
def render_POST(self, txrequest):
################### Custom Response ###################
db = dataset.connect('sqlite:///search.db')
table = db['movie_search']
################### End Of Custom Response ###################
args = native_stringify_dict(copy(txrequest.args), keys_only=False)
settings = args.pop('setting', [])
settings = dict(x.split('=', 1) for x in settings)
args = dict((k, v[0]) for k, v in args.items())
project = args.pop('project')
spider = args.pop('spider')
version = args.get('_version', '')
spiders = get_spider_list(project, version=version)
if not spider in spiders:
return {"status": "error", "message": "spider '%s' not found" % spider}
args['settings'] = settings
jobid = args.pop('jobid', uuid.uuid1().hex)
args['_job'] = jobid
self.root.scheduler.schedule(project, spider, **args)
return {"node_name": self.root.nodename, "status": "ok", "jobid": jobid}
并使用Windows中cmd的cammand运行项目:
$ curl http://localhost:6800/schedule.json -d project=Movie -d spider=search -d movieName="some_movie"
问题是:我无法从Web服务访问用户使用json发送的movieName(arg),无法将链接发送回用户! 还是我可以在Spider中实现自己的Web服务?
任何解决方案?