**这是我的代码,可按需删除元素,实际上是从分页中加载数据下一步按钮按钮分页下一步按钮href =“ javascript:__ postman()”我使用了硒,但硒按钮单击不一致将返回未找到javascript的错误**
import scrapy
import json
import request
class QuotesSpider(scrapy.Spider):
name = "product_scraper"
allowed_domains = ["ozhat-turkiye.com"]
with open('brands.json') as data_file:
data_item = json.load(data_file)
start_urls = list()
for item in data_item:
start_urls.append(item["url"])
def parse_cachesList(self, response):
print("1")
def parse(self, response):
urls = response.css("div.productimage img::attr(src)").extract()
list_urls = list()
list_urls.append(response.urljoin(urls[0]))
product_list = list()
total_pages = response.css("span#maincontent_DataPager a:nth-last-child(3)").extract_first().strip() # total_pages = total_pages.text
total_pages = total_pages.split(">")[1]
total_pages = int(total_pages.split("<")[0])
for i in range(0, total_pages):
data = response.css("div.tabledivinlineblock a::attr(href)").extract_first()
print(data)
next_page = response.css("span#maincontent_DataPager span.decornonepagerlink ~ a::attr(href)").extract_first()
next_page = next_page.split("'")[1]
yield scrapy.FormRequest.from_response(
response,
formid="frm1",
formdata={'__EVENTTARGET': next_page,
'__EVENTARGUMENT': '', },
callback=self.parse_cachesList
)
现在我要手动提交表单并通过分页下次点击提交表单加载所有数据