我正在使用dataset.readthedocs.io(这是基于SQLAlchemy的API)进行了一些操作,但是我无法以简单的方式通用地创建表。我知道可以编写create table语句并连接到表,但是我想知道是否有更方便的方法。
下面的示例代码:
管道代码:
class DBWritePipeline(object):
def __init__(self, dataset_uri, dataset_table):
self.dataset_uri = dataset_uri
self.dataset_table = dataset_table
@classmethod
def from_crawler(cls, crawler):
return cls(
dataset_uri=crawler.settings.get('CONNECTION_STRING'),
dataset_table=crawler.settings.get('DATASET_TABLE', 'itabone')
)
def open_spider(self, spider):
self.db = dataset.connect(self.dataset_uri)
def close_spider(self, spider):
pass
def process_item(self, item, spider):
if hasattr(item,"key"):
self.db[item.table].upsert(item,item.key)
else:
self.db[item.table].insert(item)
#self.db[self.dataset_table].insert(item)
return item
设置:
# Database settings
MYSQL_HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USER = 'scrapy'
MYSQL_PASSWORD = 'scrapy'
MYSQL_DB = 'scrapy'
MYSQL_UPSERT = True
CONNECTION_STRING = "{drivername}://{user}:{passwd}@{host}:{port}/{db_name}".format(
drivername="mysql",
user=MYSQL_USER,
passwd=MYSQL_PASSWORD,
host=MYSQL_HOST,
port=MYSQL_PORT,
db_name=MYSQL_DB,)
示例项目:
class Itabone(scrapy.Item):
name = scrapy.Field(type="String(32)")
val = scrapy.Field()
table = "itabone"
key = "name"
class Itabthree(scrapy.Item):
name = scrapy.Field()
val = scrapy.Field()
table = "itabthree"
class Itabtwo(scrapy.Item):
name = scrapy.Field(max_length=30,key=True)
val = scrapy.Field()
table = "itabtwo"
key = ["name","val"]
蜘蛛:
import scrapy
from myproject.items import Itabone
from myproject.items import Itabtwo
from myproject.items import Itabthree
class ItemTest(scrapy.Spider):
name = 'itempiptest'
start_urls = ['https://www.example.com']
def parse(self, response):
i1 = Itabone(name='tsts',val=1)
i2 = Itabtwo(name='vcvcv',val=2)
i3 = Itabthree(name='vcvcv',val=2)
yield i1
yield i2
yield i3
我应该在item.py中写些什么,以便正确创建和更新表?