我的代码:
腓:
exec("python mypythoncode.py $input", $my_output);
的Python:
mypythoncode.py
import sys
from scrapy.crawler import CrawlerProcess, CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging
from scrapy.settings import Settings
from tutorial.spiders.MixSpider import MixSpider
inputURL = sys.argv[1]
crawler = CrawlerProcess(Settings())
crawler.crawl(MixSpider, inputURL=inputURL)
crawler.start()
MixSpider.py
import scrapy
import string
from tutorial.items import DmozItem
from urlparse import urlparse
from scrapy.http.request import Request
class MixSpider(scrapy.Spider):
name = 'spider'
def __init__(self, *args, **kwargs):
super(MixSpider, self).__init__(*args, **kwargs)
url = kwargs.get('inputURL')
self.start_urls = [url]
def parse(self, response):
for slide in response.css('#all > div > div > div > div.slide'):
title = slide.css('div > div.frame > address > a::text').extract()
pro_link = slide.css('div > div.frame > address > a::attr(href)').extract()
yield scrapy.Request(pro_link,meta={'pro_title' : title,'title_link' : pro_link},callback=self.parse_property)
def parse_property(self, response):
print "inside"