我正在尝试使用scrapy获取数据。它没有显示任何错误,但仍然没有得到任何结果。有人可以帮忙吗? 以下是我正在使用的代码。
import scrapy
class SecSpider(scrapy.Spider):
name = 'Sec'
allowed_domains = ['www.sec.gov']
start_urls = ['https://www.sec.gov/litigation/litreleases.shtml']
def parse(self, response):
rows = response.xpath('//*[@id="mainlist"]//tr')[2:]
for row in rows:
link = row.xpath('.//@href').extract_first()
number = row.xpath('.//a/text()').extract_first()
date = row.xpath('.//td[2]/text()').extract_first()
title = row.xpath('.//td[3]/text()').extract()
yield {
"Link": link,
"Number": number,
"Date": date,
"Title": title
}
import pymssql
class ScrapingPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='localhost', user='sa', password='data1234', database='Sec')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
self.cursor.execute("INSERT INTO updates(link, number, date, title) VALUES (%s, %s, %s, %s)",
(item['Link'], item['Number'], item['Date'], item['Title']))
self.conn.commit()
return item
ITEM_PIPELINES = {'Scraping.pipelines.ScrapingPipeline': 300}
from scrapy import item, Field
class ScrapingItem(scrapy.Item):
link = scrapy.Field()
number = scrapy.Field()
date = scrapy.Field()
title = scrapy.Field()