对于Scrapy中的循环,不返回任何内容或相同的数据

时间:2015-12-15 07:30:25

标签: python scrapy

我在Scrapy& amp; Python和我真的需要你的帮助。 (请原谅我的英语,我说西班牙语)。

我的问题是:我需要从网站http://www.icreson.gob.mx/PAGINANUEVA/sala/index.php?Sala=HERMOSILLO中删除数据我需要传递各种参数,其中一个是“inscripcion”,用于标识我要查找的信息。返回的第一个数据位于http://www.icreson.gob.mx/PAGINANUEVA/sala/inscripciones.php然后我需要更改另外三个链接,以完成我需要废弃的所有数据:

<script>
  var seen = {};
  $('table tr').each(function() {
  var txt = $(this).text();
  if (seen[txt])
      $(this).remove();
  else
    seen[txt] = true;
  });
</script>

我创建一个for循环增加“inscripcion”数字(i中的for),当它只有一个范围(368610,368611)时,它工作得很好,它返回正确的数据。但是如果我在范围内传递了两个或更多个麻木(368610,368618),那么数据返回它是空的或相同的。

这是 items.py

中的代码
.../PAGINANUEVA/sala/anotaciones.php,
.../PAGINANUEVA/sala/bien_inmueble.php,
.../PAGINANUEVA/sala/usuario.php

icre_sp.py 中蜘蛛的代码是:

from scrapy import Field
from scrapy import Item

class Ejemplo2Item(Item):
    inscripcion = Field()
    existe_inscripcion = Field()
    num_insc = Field()
    anotacion = Field()
    bien = Field()
    usuario = Field()

这是多个“inscripcion”范围(368612,368622)的结果,你可以看到重复,但它应该是不同的。

# -*- coding: utf-8 -*-

from scrapy import Spider
from ejemplo2.items import Ejemplo2Item
from scrapy import FormRequest
from scrapy.http import Request
from scrapy import Selector

def _limpiar(cantidad):  
    if cantidad:
        if '$' in cantidad:
            cantidad = cantidad.replace('$', '')
        if ',' in cantidad:
            cantidad = cantidad.replace(',', '')
        if '.' in cantidad:
            cantidad = cantidad.split('.')[0]
        if cantidad.isdigit():
            cantidad = int(cantidad)
        return cantidad
    else:
        return 0


class Ejemplo2Spider(Spider):
    name = 'gsf'
    start_urls = ['http://www.icreson.gob.mx/PAGINANUEVA/sala/index.php?ban=1&Sala=',]

    def parse(self, response):

        for i in range(368612,368622):
            h = str(i)
            item = Ejemplo2Item()
            item['num_insc'] = h
            yield FormRequest.from_response(response,
                                            formname='MiFormulario',
                                            formdata={'instituto': '1', 
                                                    'area': 'LIBRO 1',
                                                    'inscripcion': h},
                                            cookies={'PHPSESSID':'f92c499925f059eb04b8045290011eda',
                                                    '_ga': 'GA1.3.612616751.1438557988'},
                                            meta={'item': item},
                                            dont_filter=True,
                                            method='POST',
                                            callback=self.parseInsc)

    def parseInsc(self, response):
        item = response.meta['item']
        if "LO SIENTO NO SE PUEDE" in response.body:
            item['existe_inscripcion'] = 'NO'
            return item

        valores = response.xpath('//input/@value').extract()
        claves = response.xpath('//input/@name').extract()
        v_text = response.xpath("//textarea/text()").extract()[0]
        insc = dict(zip(claves, valores))
        insc['Monto'] = _limpiar(insc['Monto'])
        insc['Fedatario'] = v_text
        item['inscripcion'] = insc
        item['existe_inscripcion'] = 'SI'

        solicitud1 = Request('http://www.icreson.gob.mx/PAGINANUEVA/sala/anotaciones.php', 
                        dont_filter=True,
                        meta={'item': item},
                        callback=self.parseAnot)
        solicitud1.meta['item'] = item
        return solicitud1

    def parseAnot(self, response):
        item = response.meta['item']
        if "LA INSCRIPCION NO TIENE" in response.body: 
            anot = 'NO TIENE ANOTACION'
        else:
            campos = response.xpath("//input")
            son = (len(campos) - 3 ) // 5
            off_set = 0
            van = 0
            anot = []
            while True:
                if van == son:
                    break
                c1 = campos[1+off_set].xpath("@value").extract()[0]
                c2 = campos[2+off_set].xpath("@value").extract()[0]
                c3 = campos[3+off_set].xpath("@value").extract()[0]
                c4 = campos[4+off_set].xpath("@value").extract()[0]
                c5 = campos[5+off_set].xpath("@value").extract()[0]
                ren = {'a1': c1, 'a2': c2, 'a3': c3, 'a4': c4, 'a5': c5}
                anot.append(ren)
                off_set = off_set + 5
                van = van + 1

        item['anotacion'] = anot
        solicitud2 = Request('http://www.icreson.gob.mx/PAGINANUEVA/sala/bien_inmueble.php', 
                        dont_filter=True,
                        meta={'item': item}, 
                        callback=self.parseBien)
        solicitud2.meta['item'] = item
        return solicitud2

    def parseBien(self, response):
        item = response.meta['item']
        if "NO EXISTE NINGUN BIEN" in response.body:
            bien = 'NO TIENE BIEN INMUEBLE'
        else:
            filas = response.xpath("//div[@class='mid_conten_PagNueva']/table/tr[2]/td/table[2]//tr[position()>1]")  # el .extract() al final lo convierte en lista o valor, sin eso es un tipo Selector
            son = len(filas) - 3
            bien = []
            van = 0
            for fila in filas:
                v1 = fila.xpath('./td[1]/span/text()').extract()
                v1 = v1[0] if len(v1) > 0 else ""
                v2 = fila.xpath('./td[2]/span/text()').extract()
                v2 = v2[0] if len(v2) > 0 else ""
                v3 = fila.xpath('./td[3]/span/text()').extract()
                v3 = v3[0] if len(v3) > 0 else ""
                v4 = fila.xpath('./td[4]/span/text()').extract()
                v4 = v4[0] if len(v4) > 0 else ""
                v5 = fila.xpath('./td[5]/span/text()').extract()
                v5 = v5[0] if len(v5) > 0 else ""
                v6 = fila.xpath('./td[6]/span/text()').extract()
                v6 = v6[0] if len(v6) > 0 else ""
                v7 = fila.xpath('./td[7]/span/text()').extract()
                v7 = v7[0] if len(v7) > 0 else ""
                v8 = fila.xpath('./td[8]/span/text()').extract()
                v8 = v8[0] if len(v8) > 0 else ""
                ren = {'b1': v1, 
                        'b2': v2, 
                        'b3': v3, 
                        'b4': v4, 
                        'b5': v5, 
                        'b6': v6, 
                        'b7': v7, 
                        'b8': v8
                        }
                bien.append(ren)
                van = van + 1
                if van == son:
                    break

        item['bien'] = bien
        solicitud3 = Request('http://www.icreson.gob.mx/PAGINANUEVA/sala/usuario.php', 
                        dont_filter=True,
                        meta={'item': item}, 
                        callback=self.parseUsu)
        solicitud3.meta['item'] = item
        return solicitud3

    def parseUsu(self, response):
        item = response.meta['item']
        if "NO EXISTEN USUARIOS" in response.body:
            usua = 'NO TIENE USUARIO'
        else:
            filas = response.xpath("//div[@class='mid_conten_PagNueva']/table/tr[2]/td/table/tr/td/table[2]//tr[position()>1]")  # el .extract() al final lo convierte en lista o valor, sin eso es un tipo Selector
            usua = []
            for fila in filas:                                                                                 
                v1 = fila.xpath('./td[1]/span/text()').extract()
                v1 = v1[0] if len(v1) > 0 else ""
                v2 = fila.xpath('./td[2]/span/text()').extract()
                v2 = v2[0] if len(v2) > 0 else ""
                v3 = fila.xpath('./td[3]/span/text()').extract()
                v3 = v3[0] if len(v3) > 0 else ""
                v4 = fila.xpath('./td[4]/span/text()').extract()
                v4 = v4[0] if len(v4) > 0 else ""
                v5 = fila.xpath('./td[5]/span/text()').extract()
                v5 = v5[0] if len(v5) > 0 else ""
                v6 = fila.xpath('./td[6]/span/text()').extract()
                v6 = v6[0] if len(v6) > 0 else ""
                ren = {'u1': v1, 
                        'u2': v2, 
                        'u3': v3, 
                        'u4': v4, 
                        'u5': v5, 
                        'u6': v6
                        }
                usua.append(ren)

        item['usuario'] = usua
        return item

这只是范围内只有一个“inscripcion”的结果(368612,368613)

[{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368612", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368616", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368621", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368617", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368615", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368620", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "TREJO", "u4": "MOERNO", "u6": "S", "u1": "1", "u3": "RITA ERNESTINA ", "u2": "EMISOR"}, {"u5": "MURRIETA", "u4": "ROJO", "u6": "S", "u1": "2", "u3": "MARIA EUGENIA", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368618", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "6", "b5": "360014079001", "b6": "HERMOSILLO", "b7": "600,METROS", "b1": "1", "b2": "1", "b3": "25", "b8": "S"}], "num_insc": "368614", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "FRANCISCO JAVIER CABRERA FERNANDEZ NOTARIA NO. 11- HERMOSILL", "Vigencia": "S", "Monto": 271000, "Fecha": "2006-09-08", "Inscripcion": "368618", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 63381", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19099"}, "existe_inscripcion": "SI", "bien": [{"b4": "", "b5": "360030646005", "b6": "POLIGONO B AL NORTE DE LA CIUDAD", "b7": "40,HECTAREAS", "b1": "1", "b2": "1", "b3": "", "b8": "S"}], "num_insc": "368619", "anotacion": "NO TIENE ANOTACION"},
{"usuario": [{"u5": "RODRIGUEZ", "u4": "CONTRERAS", "u6": "S", "u1": "1", "u3": "MARIA DEL SOCORRO GUADALUPE", "u2": "EMISOR"}, {"u5": "", "u4": "", "u6": "S", "u1": "2", "u3": "INMOBILIARIA JACEDO, S.A. DE C.V.", "u2": "RECEPTOR"}], "inscripcion": {"hora": "", "Poblacion": "HERMOSILLO", "Fedatario": "SALVADOR ANTONIO CORRAL MARTINEZ NOTARIA No. 28- HERMOSILLO", "Vigencia": "S", "Monto": 14800000, "Fecha": "2007-10-08", "Inscripcion": "368613", "Acto2": "REGISTRO INMOBILIARIO", "Clave": "ESCRITURA PUBLICA 27237", "Municipio": "HERMOSILLO", "Acto": "COMPRA VENTA", "Libro": "1", "Fecha de Registro": "2008-05-12", "Vigencia2": "19098"}, "existe_inscripcion": "SI", "bien": [{"b4": "", "b5": "360030646005", "b6": "POLIGONO B AL NORTE DE LA CIUDAD", "b7": "40,HECTAREAS", "b1": "1", "b2": "1", "b3": "", "b8": "S"}], "num_insc": "368613", "anotacion": [{"a1": "1", "a3": "2008-04-29", "a2": "HIPOTECA", "a5": "CELEBRADA CON  OPERADORA DE EMPRESAS Y SERVICIOS, S.A. DE C.V.", "a4": "N"}, {"a1": "2", "a3": "2007-02-09", "a2": "HIPOTECA", "a5": "CELEBRADA CON DINAMICA SONORENSE, S.A. DE C.V.  LAS PARTES CONVIENEN EN QUE LA HIPOTECA SE EXTIENDA A LOS INTERESES DEVENGADOS AUN CUANDO SE EXCEDA DE TRES A\u00d1OS SIN EXCEDER EL PLAZO QUE MARCA LA LEY.", "a4": "S"}, {"a1": "3", "a3": "2009-06-05", "a2": "CANCELACION DE GRAVAMEN O LIMITACION", "a5": "CANCELADA INSCRIPCION 288097, REG. INMB. LIBRO DOS", "a4": "S"}]}]

我有w10,python 2.7,scrapy 1.0.3。

我不知道代码中缺少什么,所以我感谢你的帮助。谢谢!

0 个答案:

没有答案