将数据从spider传输到sqlalchemy表时导入错误

时间:2016-12-12 00:01:11

标签: python sqlalchemy scrapy

这就是项目树的样子:

connection

pipelines.py

rym_chart_scraper
├───scrapy.cfg
├───rym_chart_scraper
│   ├───__init__.py
│   ├───items.py
│   ├───models.py
    ├───pipelines.py
    ├───settings.py
    ├───spiders
        ├───my_spider.py
        ├───__init__.py

models.py

from models import TopAlbums, db_connect, create_topalbums_table
from sqlalchemy.orm import sessionmaker


class TopAlbumPipeline:
    def __init__(self):
        engine = db_connect()
        create_topalbums_table(engine)
        self.Session = sessionmaker(bind=engine)

    def process_item(self, item, spider):
        session = self.Session()
        topalbums = TopAlbums(**item)

        try:
            session.add(topalbums)
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return item

蜘蛛:

from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, DateTime

import settings

Base = declarative_base()


def db_connect():
    return create_engine(URL(**settings.DATABASE))


def create_topalbums_table(engine):
    Base.metadata.create_all(engine)


class TopAlbums(Base):

    __tablename__ = 'top_albums'

    id = Column(Integer, primary_key=True)
    Artist = Column('Artist', String)
    Album = Column('Album', String)
    Chart_year = Column('Chart_year', String)
    Genre = Column('Genre', String)
    Ratings = Column('Ratings', Integer)
    Reviews = Column('Reviews', Integer)
    Date = Column('Date', DateTime)

当我使用刮刀运行时:

from scrapy import Spider, Request
from rym_chart_scraper.utility import find_between, listToString
from rym_chart_scraper.items import TopAlbumChartItem
from datetime import datetime


class TopAlbumChartSpider(Spider):
    name = "top_music_charts"
    allowed_domains = ['rateyourmusic.com']

    start_urls = [
        "https://rateyourmusic.com/charts/top/album/all-time"
    ]

    n_pages = 1

    def parse(self, response):

        for album, stats in zip(response.css('div.chart_main'),
                                response.css('div.chart_stats')):
            ... 

            yield item

        next_page = response.css('a.navlinknext')[0].css(
            'a::attr(href)').extract_first()
        if next_page is not None:
            next_page = response.urljoin(next_page)
            self.n_pages += 1
            if self.n_pages < 31:
                yield Request(next_page, callback=self.parse)

我收到以下导入错误。

scrapy crawl top_music_charts

从命令行运行实际的蜘蛛时,尝试从 main 以交互方式导入“模型”不会出错。项目结构有问题吗?还是其他一些愚蠢的错误?出于某种原因,我无法理解这一点。

0 个答案:

没有答案