我在Scrapy& amp; Python和我真的需要你的帮助。 (请原谅我的英语,我说西班牙语)。
我的问题是:我需要从网站http://www.icreson.gob.mx/PAGINANUEVA/sala/index.php?Sala=HERMOSILLO中删除数据我需要传递各种参数,其中一个是“inscripcion”,用于标识我要查找的信息。返回的第一个数据位于http://www.icreson.gob.mx/PAGINANUEVA/sala/inscripciones.php然后我需要更改另外三个链接,以完成我需要废弃的所有数据:
<script> var seen = {}; $('table tr').each(function() { var txt = $(this).text(); if (seen[txt]) $(this).remove(); else seen[txt] = true; }); </script>
我创建一个for循环增加“inscripcion”数字(i中的for),当它只有一个范围(368610,368611)时,它工作得很好,它返回正确的数据。但是如果我在范围内传递了两个或更多个麻木(368610,368618),那么数据返回它是空的或相同的。
这是 items.py :
中的代码.../PAGINANUEVA/sala/anotaciones.php,
.../PAGINANUEVA/sala/bien_inmueble.php,
.../PAGINANUEVA/sala/usuario.php
icre_sp.py 中蜘蛛的代码是:
from scrapy import Field
from scrapy import Item
class Ejemplo2Item(Item):
inscripcion = Field()
existe_inscripcion = Field()
num_insc = Field()
anotacion = Field()
bien = Field()
usuario = Field()
这是多个“inscripcion”范围(368612,368622)的结果,你可以看到重复,但它应该是不同的。
# -*- coding: utf-8 -*-
from scrapy import Spider
from ejemplo2.items import Ejemplo2Item
from scrapy import FormRequest
from scrapy.http import Request
from scrapy import Selector
def _limpiar(cantidad):
if cantidad:
if '$' in cantidad:
cantidad = cantidad.replace('$', '')
if ',' in cantidad:
cantidad = cantidad.replace(',', '')
if '.' in cantidad:
cantidad = cantidad.split('.')[0]
if cantidad.isdigit():
cantidad = int(cantidad)
return cantidad
else:
return 0
class Ejemplo2Spider(Spider):
name = 'gsf'
start_urls = ['http://www.icreson.gob.mx/PAGINANUEVA/sala/index.php?ban=1&Sala=',]
def parse(self, response):
for i in range(368612,368622):
h = str(i)
item = Ejemplo2Item()
item['num_insc'] = h
yield FormRequest.from_response(response,
formname='MiFormulario',
formdata={'instituto': '1',
'area': 'LIBRO 1',
'inscripcion': h},
cookies={'PHPSESSID':'f92c499925f059eb04b8045290011eda',
'_ga': 'GA1.3.612616751.1438557988'},
meta={'item': item},
dont_filter=True,
method='POST',
callback=self.parseInsc)
def parseInsc(self, response):
item = response.meta['item']
if "LO SIENTO NO SE PUEDE" in response.body:
item['existe_inscripcion'] = 'NO'
return item
valores = response.xpath('//input/@value').extract()
claves = response.xpath('//input/@name').extract()
v_text = response.xpath("//textarea/text()").extract()[0]
insc = dict(zip(claves, valores))
insc['Monto'] = _limpiar(insc['Monto'])
insc['Fedatario'] = v_text
item['inscripcion'] = insc
item['existe_inscripcion'] = 'SI'
solicitud1 = Request('http://www.icreson.gob.mx/PAGINANUEVA/sala/anotaciones.php',
dont_filter=True,
meta={'item': item},
callback=self.parseAnot)
solicitud1.meta['item'] = item
return solicitud1
def parseAnot(self, response):
item = response.meta['item']
if "LA INSCRIPCION NO TIENE" in response.body:
anot = 'NO TIENE ANOTACION'
else:
campos = response.xpath("//input")
son = (len(campos) - 3 ) // 5
off_set = 0
van = 0
anot = []
while True:
if van == son:
break
c1 = campos[1+off_set].xpath("@value").extract()[0]
c2 = campos[2+off_set].xpath("@value").extract()[0]
c3 = campos[3+off_set].xpath("@value").extract()[0]
c4 = campos[4+off_set].xpath("@value").extract()[0]
c5 = campos[5+off_set].xpath("@value").extract()[0]
ren = {'a1': c1, 'a2': c2, 'a3': c3, 'a4': c4, 'a5': c5}
anot.append(ren)
off_set = off_set + 5
van = van + 1
item['anotacion'] = anot
solicitud2 = Request('http://www.icreson.gob.mx/PAGINANUEVA/sala/bien_inmueble.php',
dont_filter=True,
meta={'item': item},
callback=self.parseBien)
solicitud2.meta['item'] = item
return solicitud2
def parseBien(self, response):
item = response.meta['item']
if "NO EXISTE NINGUN BIEN" in response.body:
bien = 'NO TIENE BIEN INMUEBLE'
else:
filas = response.xpath("//div[@class='mid_conten_PagNueva']/table/tr[2]/td/table[2]//tr[position()>1]") # el .extract() al final lo convierte en lista o valor, sin eso es un tipo Selector
son = len(filas) - 3
bien = []
van = 0
for fila in filas:
v1 = fila.xpath('./td[1]/span/text()').extract()
v1 = v1[0] if len(v1) > 0 else ""
v2 = fila.xpath('./td[2]/span/text()').extract()
v2 = v2[0] if len(v2) > 0 else ""
v3 = fila.xpath('./td[3]/span/text()').extract()
v3 = v3[0] if len(v3) > 0 else ""
v4 = fila.xpath('./td[4]/span/text()').extract()
v4 = v4[0] if len(v4) > 0 else ""
v5 = fila.xpath('./td[5]/span/text()').extract()
v5 = v5[0] if len(v5) > 0 else ""
v6 = fila.xpath('./td[6]/span/text()').extract()
v6 = v6[0] if len(v6) > 0 else ""
v7 = fila.xpath('./td[7]/span/text()').extract()
v7 = v7[0] if len(v7) > 0 else ""
v8 = fila.xpath('./td[8]/span/text()').extract()
v8 = v8[0] if len(v8) > 0 else ""
ren = {'b1': v1,
'b2': v2,
'b3': v3,
'b4': v4,
'b5': v5,
'b6': v6,
'b7': v7,
'b8': v8
}
bien.append(ren)
van = van + 1
if van == son:
break
item['bien'] = bien
solicitud3 = Request('http://www.icreson.gob.mx/PAGINANUEVA/sala/usuario.php',
dont_filter=True,
meta={'item': item},
callback=self.parseUsu)
solicitud3.meta['item'] = item
return solicitud3
def parseUsu(self, response):
item = response.meta['item']
if "NO EXISTEN USUARIOS" in response.body:
usua = 'NO TIENE USUARIO'
else:
filas = response.xpath("//div[@class='mid_conten_PagNueva']/table/tr[2]/td/table/tr/td/table[2]//tr[position()>1]") # el .extract() al final lo convierte en lista o valor, sin eso es un tipo Selector
usua = []
for fila in filas:
v1 = fila.xpath('./td[1]/span/text()').extract()
v1 = v1[0] if len(v1) > 0 else ""
v2 = fila.xpath('./td[2]/span/text()').extract()
v2 = v2[0] if len(v2) > 0 else ""
v3 = fila.xpath('./td[3]/span/text()').extract()
v3 = v3[0] if len(v3) > 0 else ""
v4 = fila.xpath('./td[4]/span/text()').extract()
v4 = v4[0] if len(v4) > 0 else ""
v5 = fila.xpath('./td[5]/span/text()').extract()
v5 = v5[0] if len(v5) > 0 else ""
v6 = fila.xpath('./td[6]/span/text()').extract()
v6 = v6[0] if len(v6) > 0 else ""
ren = {'u1': v1,
'u2': v2,
'u3': v3,
'u4': v4,
'u5': v5,
'u6': v6
}
usua.append(ren)
item['usuario'] = usua
return item
这只是范围内只有一个“inscripcion”的结果(368612,368613):
[{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368612", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368616", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368621", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368617", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368615", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368620", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368618", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368614", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "", "b5": "360030646005", "b6": "POLIGONO B AL NORTE DE LA CIUDAD", "b7": "40,HECTAREAS", "b1": "1", "b2": "1", "b3": "", "b8": "S"}], "num_insc": "368619", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "SALVADOR ANTONIO CORRAL MARTINEZ NOTARIA No. 28- HERMOSILLO", "Vigencia": "S", "Monto": 14800000, "Fecha": "2007-10-08", "Inscripcion": "368613", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 27237", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19098"}, "existe_inscripcion": "SI", "bien": [{"b4": "", "b5": "360030646005", "b6": "POLIGONO B AL NORTE DE LA CIUDAD", "b7": "40,HECTAREAS", "b1": "1", "b2": "1", "b3": "", "b8": "S"}], "num_insc": "368613", "anotacion": [{"a1": "1", "a3": "2008-04-29", "a2": "HIPOTECA", "a5": "CELEBRADA CON OPERADORA DE EMPRESAS Y SERVICIOS, S.A. DE C.V.", "a4": "N"}, {"a1": "2", "a3": "2007-02-09", "a2": "HIPOTECA", "a5": "CELEBRADA CON DINAMICA SONORENSE, S.A. DE C.V. LAS PARTES CONVIENEN EN QUE LA HIPOTECA SE EXTIENDA A LOS INTERESES DEVENGADOS AUN CUANDO SE EXCEDA DE TRES A\u00d1OS SIN EXCEDER EL PLAZO QUE MARCA LA LEY.", "a4": "S"}, {"a1": "3", "a3": "2009-06-05", "a2": "CANCELACION DE GRAVAMEN O LIMITACION", "a5": "CANCELADA INSCRIPCION 288097, REG. INMB. LIBRO DOS", "a4": "S"}]}]
我有w10,python 2.7,scrapy 1.0.3。
我不知道代码中缺少什么,所以我感谢你的帮助。谢谢!