我几乎已经完成了这个项目,但是我无法弄清楚为什么我的管道只插入了我的蜘蛛刚抓到的一些物品。这是我的代码:
首先是项目:
from scrapy.item import Item, Field
class PruebaMercadoLibreItem(Item):
producto = Field()
precio = Field()
现在我的蜘蛛:
from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.item import Item, Field
from prueba_mercadolibre.items import PruebaMercadoLibreItem
class MLSpider(Spider):
name = "mlspider"
allowed_domains = ["mercadolibre.com"]
start_urls = ["http://www.mercadolibre.com.mx"]
def parse (self, response):
hxs = Selector(response)
titles = hxs.xpath("//div[contains(@class, 'item-data')]")
for title in titles:
item = PruebaMercadoLibreItem()
item["producto"] = title.xpath("p[@class='title']/@title").extract()[0]
item["precio"] = title.xpath("span[@class='ch-price']/text()").extract()[0]
yield item
最后我的管道:
from twisted.enterprise import adbapi
class PruebaMercadoLibrePipeline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls, settings):
dbargs = dict(
host=settings['MYSQL_HOST'],
db=settings['MYSQL_DBNAME'],
user=settings['MYSQL_USER'],
passwd=settings['MYSQL_PASSWD'],
charset='utf8',
use_unicode=True,
)
dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs)
return cls(dbpool)
def process_item(self, item, spider):
d = self.dbpool.runInteraction(self._do_upsert, item, spider)
d.addErrback(self._handle_error, item, spider)
d.addBoth(lambda _: item)
return d
def _do_upsert(self, conn, item, spider):
conn.execute("""
INSERT INTO productos (producto, precio)
VALUES (%s, %s)
""", (item['producto'], item['precio']))
def _handle_error(self, failure, item, spider):
log.err(failure)
好的,我的设置:
BOT_NAME = 'prueba_mercadolibre'
SPIDER_MODULES = ['prueba_mercadolibre.spiders']
NEWSPIDER_MODULE = 'prueba_mercadolibre.spiders'
DEFAULT_ITEM_CLASS = 'prueba_mercadolibre.items.PruebaMercadoLibreItem'
ITEM_PIPELINES = [
'prueba_mercadolibre.pipelines.PruebaMercadoLibrePipeline',
]
MYSQL_HOST = 'localhost'
MYSQL_DBNAME = 'ejercicio_2'
MYSQL_USER = 'root'
MYSQL_PASSWD = 'root'
好的,最后我的数据库很简单但生病了,只需将代码留在这里:
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='TRADITIONAL,ALLOW_INVALID_DATES';
CREATE SCHEMA IF NOT EXISTS `ejercicio_2` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci ;
USE `ejercicio_2` ;
CREATE TABLE IF NOT EXISTS `ejercicio_2`.`productos` (
`id` INT ZEROFILL NOT NULL AUTO_INCREMENT,
`producto` VARCHAR(45) NOT NULL,
`precio` VARCHAR(45) NOT NULL,
PRIMARY KEY (`id`))
ENGINE = InnoDB;
SET SQL_MODE=@OLD_SQL_MODE;
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;