我要调试vscode中的scrapy。 所以我制作了Runner.py文件。
runner.py
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from sports_scrapy.spiders.player_spider import AtpPlayerSpider
process = CrawlerProcess(get_project_settings())
process.crawl(AtpPlayerSpider)
process.start() # the script will block here until the crawling is finished
这是items.py
import scrapy
from scrapy_djangoitem import DjangoItem
from players.models import ATPPlayer
class AtpPlayerItem(DjangoItem):
django_model = ATPPlayer
pipline.py
from players.models import ATPPlayer
class AtpPlayerScrapyPipeline(object):
def process_item(self, item, spider):
try:
player = ATPPlayer.objects.get(name=item["name"])
print (player.name + " already exist")
return item
except ATPPlayer.DoesNotExist:
pass
item.save()
return item
我将以下代码添加到settings.py
ort os
import sys
import django
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), ".."))
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), ".."))
os.environ['DJANGO_SETTINGS_MODULE'] = 'betting.settings'
django.setup()
这里是player_spider.py
import scrapy
import re
from sports_scrapy.items import AtpPlayerItem
#Beatiful Soup - To parse the html
from bs4 import BeautifulSoup
from time import sleep
import time
class AtpPlayerSpider(scrapy.Spider):
name = 'atpplayers'
start_urls = [
'https://live-tennis.eu/fr/classement-atp-live',
]
def parse(self, response):
player_rows = response.xpath('//table[@id="u868"]/tbody/tr[@bgcolor and @class]')
for player_item in player_rows:
#rank
rank_txt = player_item.xpath('./td[1]/text()').extract_first().strip()
if rank_txt == '':
rank = 1000
else:
rank = int(player_item.xpath('./td[1]/text()').extract_first().strip(), 10)
#max_rank
mc = player_item.xpath('./td[2]/b[2]/text()').extract_first().strip()
if bool(re.match('^(?=.*[a-zA-Z-])', mc)) != True:
max_rank = int(mc,10)
elif mc.find('CH') != -1 or mc.find('MC') != -1:
max_rank = rank
但是当通过运行Runner.py调试scrapy时。
我遇到错误
发生异常:ModuleNotFoundError没有名为“玩家”的模块 文件“ D:\ NikolayData \ betting \ sports_scrapy \ sports_scrapy \ items.py”, 第10行,在 从players.models导入ATPPlayer文件“ D:\ NikolayData \ betting \ sports_scrapy \ sports_scrapy \ spiders \ player_spider.py”, 第3行,在 从sports_scrapy.items导入AtpPlayerItem文件“ D:\ NikolayData \ betting \ sports_scrapy \ runner.py”,第5行,在 从sports_scrapy.spiders.player_spider导入AtpPlayerSpider
我该如何解决这个问题?