我正在用python编写一个简单的网络抓取程序,并且编写了该程序,但是当我尝试使用以下命令“ scrapy crawl splash_spider”在命令行(linux)中运行它时,出现以下错误消息: “ ImportError:无法导入名称蜘蛛”。
items.py
import scrapy
class ScrapyJavascriptItem(scrapy.Item):
home_team = scrapy.Field()
away_team = scrapy.Field()
settings.py
BOT_NAME = 'scrapy_javascript'
SPIDER_MODULES = ['scrapy_javascript.spiders']
NEWSPIDER_MODULE = 'scrapy_javascript.spiders'
ROBOTSTXT_OBEY = True
DOWNLOADER_MIDDLEWARES = {
'scrapy_splash.SplashCookiesMiddleware': 723,
'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
}
SPLASH_URL = 'http://localhost:8050'
DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'
SplashSpider.py
from scrapy.spiders import spider
class MySpider(Spider):
name = 'SplashSpider' # Name of Spider
start_urls = ['https://www.livescore.bet3000.com'] # url(s)
def start_requests(self):
for url in self.start_urls:
yield SplashRequest(url=url, callback=self.parse, args = {"wait" : 3})
#Scraping
def Parse(self, response):
item = GameItem()
for game in response.css(".tournament.filterable.table-block.status_upcomingCount.status_upcoming.status_liveoddsCount.status_liveodds.status_nextcount.status_next.kickoff"):
# Text before home team
item["home_team"] = game.css("td.hometeam.team.home::text").extract_first()
# Text before away team
item["away_team"] = game.css("td.awayteam.team.away::text").extract_first()
yield item