因此,正如您将要看到的那样,我一般只是从Python / Scrapy /编程开始。我试图弄清楚如何在同一个蜘蛛中做多个表单请求。我正在尝试从职员和录音机的网页上抓取数据,但是有两个(或更多)不同的名字。这就是让我获得所需结果的第一页(名称为“Cruz”):
Import scrapy
class LoginSpider(scrapy.Spider):
name = "CRSpider5"
login_url = 'http://recordingsearch.car.elpasoco.com/rsui/opr/search.aspx'
start_urls = [login_url]
def parse(self, response):
validation = response.css('input[name="__EVENTVALIDATION"]::attr(value)').extract_first()
state = response.css('input[name="__VIEWSTATE"]::attr(value)').extract_first()
generator = response.css('input[name="__VIEWSTATEGENERATOR"]::attr(value)').extract_first()
data = {
'__EVENTVALIDATION' : validation,
'__VIEWSTATE' : state,
'__VIEWSTATEGENERATOR' : generator,
'__LASTFOCUS' : '',
'__EVENTTARGET' : '',
'__EVENTARGUMENT' : '',
'ctl00$ContentPlaceHolder1$btnSubmit' : 'Submit+Search',
'ctl00$ContentPlaceHolder1$lbxDocumentTypes' : 'TRANS',
'ctl00$ContentPlaceHolder1$txtGrantorGranteeName' : 'cruz',
}
yield scrapy.FormRequest(url=self.login_url, formdata=data, callback=self.parse_quotes)
def parse_quotes(self, response):
for test in response.css('table#ctl00_ContentPlaceHolder1_gvSearchResults tr')[1:-2]:
yield {
'Debtor': test.css("span::text").extract_first(),
'Creditor': test.css("span::text")[1].extract(),
'Date Recorded': test.css('font::text')[3].extract(),
'Instrument Number': test.css('font::text').extract_first(),
'County': 'El Paso'
}
我想在上面做同样的事情但是有多个名字(将'ctl00 $ ContentPlaceHolder1 $ txtGrantorGranteeName'字段更改为不同的名称,如“smith”或“Jones”)。我怎么能在同一只蜘蛛中做到这一点?谢谢!
答案 0 :(得分:0)
如果您想使用随机名称来启动Formrequest,您可以:
import scrapy
import random
class LoginSpider(scrapy.Spider):
name = "CRSpider5"
login_url = 'http://recordingsearch.car.elpasoco.com/rsui/opr/search.aspx'
start_urls = [login_url]
**name = ['smith','Jones']**
def parse(self, response):
validation = response.css('input[name="__EVENTVALIDATION"]::attr(value)').extract_first()
state = response.css('input[name="__VIEWSTATE"]::attr(value)').extract_first()
generator = response.css('input[name="__VIEWSTATEGENERATOR"]::attr(value)').extract_first()
data = {
'__EVENTVALIDATION' : validation,
'__VIEWSTATE' : state,
'__VIEWSTATEGENERATOR' : generator,
'__LASTFOCUS' : '',
'__EVENTTARGET' : '',
'__EVENTARGUMENT' : '',
'ctl00$ContentPlaceHolder1$btnSubmit' : 'Submit+Search',
'ctl00$ContentPlaceHolder1$lbxDocumentTypes' : 'TRANS',
'ctl00$ContentPlaceHolder1$txtGrantorGranteeName' : **random.choice(name)**,
}
yield scrapy.FormRequest(url=self.login_url, formdata=data, callback=self.parse_quotes)
如果您想使用其他名称来启动多个请求,则可以循环显示' name'列出并提出更多请求