我在使用此代码时遇到了麻烦。这是一个aspx网站,我循环搜索了所有页面。当我运行代码时,它可以正常工作并遍历每个页面,但是我只能在数据集中存储第一页。
有人可以帮助我吗?我的编码技能是基本的,所以我可能犯了一个简单的错误...
import scraperwiki
import requests
import mechanize
import re
from lxml import html
dataset = {}
def get_table(root):
rows = root.cssselect("table.resultstable tr")
index = 0
for row in rows:
cells = row.cssselect("td")
if cells:
index = index+1
dataset['index'] = index
dataset['Beneficiary'] = cells[0].text_content()
dataset['Postcode'] = cells[1].text_content()
dataset['Town'] = cells[2].text_content()
dataset['Rural development'] = cells[3].text_content()
dataset['Direct Aid'] = cells[4].text_content()
dataset['Market Schemes'] = cells[5].text_content()
dataset['Total'] = cells[6].text_content()
dataset['Responsible paying agency'] = cells[7].text_content()
Links = row.cssselect("a")
dataset['Details'] = Links[0].attrib['href']
scraperwiki.sqlite.save(["index"], dataset)
url1 = "http://cap-payments.defra.gov.uk/SearchResults.aspx?Page="
number = range(1,11)
last_part = "&Sort="
for n in number:
url = url1+str(n)+last_part
print url
br = mechanize.Browser()
response = br.open(url)
print "All forms:", [ form.name for form in br.forms() ]
br.select_form(name="aspnetForm")
print br.form
br["ctl00$Center$ContentPlaceHolder1$SearchControls1$ddlFinancialYear"] = ['2017']
response = br.submit()
web = response.read()
root = html.fromstring(web)
get_table(root)