我正在尝试从网站页面和页面URL(包含这些输入)中提取输入字段并将它们存储到数据库中... ok
*** code works fine with no errors , but this isn't the desired output i want
蜘蛛代码:
class MySpider(CrawlSpider):
name = 'isa_spider'
allowed_domains = ['testaspnet.vulnweb.com']
start_urls = ['http://testaspnet.vulnweb.com']
rules = (
Rule(SgmlLinkExtractor(allow=('/*' ) ),callback='parse_item'),)
def parse_item(self, response):
hxs = HtmlXPathSelector(response)
item=IsaItem()
item['response_fld']=response.url
res = hxs.select("//input[(@id or @name) and (@type = 'text' )]/@id ").extract()
item['text_input'] = res[0] if res else None # None is default value in case no field found
res = hxs.select("//input[(@id or @name) and (@type = 'password')]/@id").extract()
item['pass_input'] = res[0] if res else None # None is default value in case no field found
res = hxs.select("//input[(@id or @name) and (@type = 'file')]/@id").extract()
item['file_input'] = res[0] if res else None # None is default value in case no field found
return item
管道代码
class SQLiteStorePipeline(object):
def __init__(self):
self.conn = sqlite3.connect('./project.db')
self.cur = self.conn.cursor()
def process_item(self, item, spider):
self.cur.execute("insert into inputs ( input_name) values(?)", (item['text_input'],))
self.cur.execute("insert into inputs ( input_name) values(?)", (item['pass_input'],))
self.cur.execute("insert into inputs ( input_name) values(?)", (item['file_input'],))
self.cur.execute("insert into links (link) values(?)", (item['response_fld'],))
self.conn.commit()
return item
数据库架构 picture
必需输出 picture
(抱歉没有直接插入图片,因为我的声誉低于10)
答案 0 :(得分:0)
尚未对此进行测试:
class SQLiteStorePipeline(object):
def __init__(self):
self.conn = sqlite3.connect('./project.db')
self.cur = self.conn.cursor()
def process_item(self, item, spider):
cursor = self.cur
target_id = ? # determine target id
cursor.execute("insert into links (target, link) values(?, ?)", (target_id, item['response_fld'],))
link_id = cursor.lastrowid # find out just inserted link id
cursor.execute("insert into inputs (link_id, input_name, input_type) values(?, ?, ?)", (link_id, item['text_input'], 1))
cursor.execute("insert into inputs (link_id, input_name, input_type) values(?, ?, ?)", (link_id, item['pass_input'], 2))
cursor.execute("insert into inputs (link_id, input_name, input_type) values(?, ?, ?)", (link_id, item['file_input'], 3))
self.conn.commit()