def start_requests(self):
page=1
url = f'http://gs.amac.org.cn/amac-infodisc/api/pof/fund?rand=0.03238375864053089&page={page}&size=100'
payload = {}
yield scrapy.Request(url,method="POST",body=json.dumps(payload),headers=self.headers,callback=self.parse)
当我发布{}时,http返回status_code400。但是我可以将其与“请求”模块一起发布,但是这样,它给我带来了太多的阻碍。我想知道我该怎么做将{}贴出来,非常感谢!
我想迷恋的网站是:http://gs.amac.org.cn/amac-infodisc/res/pof/fund/index.html 以下代码可用于请求模块:
# -*- coding: utf-8 -*-
import scrapy
import json
from scrapy.spider import CrawlSpider
import requests
from Fund.items import FundItem
class FundSpider(scrapy.Spider):
name = 'fund'
allowed_domains = ['gs.amac.org.cn']
# start_urls = ['http://gs.amac.org.cn/']
headers = { # 请求头
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control':'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '2',
'Content-Type': 'application/json',
'Host': 'gs.amac.org.cn',
'Origin': 'http://gs.amac.org.cn',
'Referer': 'http://gs.amac.org.cn/amac-infodisc/res/pof/fund/index.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
def start_requests(self):
# url = 'http://gs.amac.org.cn/amac-infodisc/api/pof/fund?rand=0.3248183083707361&page=1&size=20'
base_url2 ='http://gs.amac.org.cn/amac-infodisc/res/pof/fund/'
for page in range(1):
url = f'http://gs.amac.org.cn/amac-infodisc/api/pof/fund?rand=0.03238375864053089&page={page}&size=100'
request = requests.post(url=url,data=json.dumps({}),headers=self.headers)
for i in range(100):
url2 = base_url2 + request.json()["content"][i]["url"]
yield scrapy.Request(url2,callback=self.parse)