这是我的python代码:
import scrapy
from bs4 import BeautifulSoup as Soup
import requests as req
class TEst(scrapy.Spider):
name = "test"
start_urls = [
'https://www.bevonordics.se/alla-produkter/pumpar/poolpumpar/pumpar-med-enkel-hastighet/hayward-poolpump-type-super-pump-17725?c=9857'
]
def parse(self,response):
soup = Soup(response.text, "lxml")
yield {
'parts': self.get_spare_parts(soup, url=response.url)
}
def get_spare_parts(self,soup,url):
crsf_token = soup.find('input',{'name':"__csrf_token"}).get('value').strip()
print(f"{crsf_token} token>>>>>>>>")
spare_parts = []
#the first artcle number getsb all spareparts, if available
article_no = soup.find('div', class_ ="product-variants-accordion--item").get('data-variantnumber')
res = req.request(
method="POST",
url ='https://www.bevonordics.se/spareparts/spareparts/',
headers = {
'x-csrf-token': crsf_token,
'accept': "a*/*",
'accept-encoding': "gzip, deflate, br",
'accept-language': "en-GB,en-US;q=0.9,en;q=0.8",
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'origin': 'https://www.bevonordics.se',
'referer': url
},
cookies = {'cookie':"_gcl_au=1.1.2133245834.1625666558; _ga=GA1.2.44217088.1625666563; _hjid=0b9189a7-774b-4054-8b3f-6679b0b1153d; allowCookie=1; _hjDonePolls=680290%2C680275; __csrf_token-6=qSHpqpknXTd1kkoeKY6xLqyyLLPIcU; session-6=24a607a1be1b0acec621d0950311356a1334f24a5463777a4c8fc6b7ef072990; _hjIncludedInPageviewSample=1; _hjTLDTest=1; _hjAbsoluteSessionInProgress=0; _hjIncludedInSessionSample=1; _gid=GA1.2.592682141.1626017440; _gat_UA-646417-39=1; x-ua-device=tablet; nocache=detail-6"},
json={'variantNumber': article_no}
)
print(res)
spare_parts.append(res.text)
return spare_parts
这是卷曲版本:
curl 'https://www.bevonordics.se/spareparts/spareparts' \
-H 'x-csrf-token: qSHpqpknXTd1kkoeKY6xLqyyLLPIcU' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36' \
-H 'content-type: application/x-www-form-urlencoded; charset=UTF-8' \
-H 'origin: https://www.bevonordics.se' \
-H 'referer: https://www.bevonordics.se/alla-produkter/pumpar/poolpumpar/pumpar-med-enkel-hastighet/hayward-poolpump-type-super-pump-17725?c=9857' \
-H 'cookie: _gcl_au=1.1.2133245834.1625666558; _ga=GA1.2.44217088.1625666563; _hjid=0b9189a7-774b-4054-8b3f-6679b0b1153d; allowCookie=1; _hjDonePolls=680290%2C680275; __csrf_token-6=qSHpqpknXTd1kkoeKY6xLqyyLLPIcU; session-6=24a607a1be1b0acec621d0950311356a1334f24a5463777a4c8fc6b7ef072990; _hjIncludedInPageviewSample=1; _hjTLDTest=1; _hjAbsoluteSessionInProgress=0; _hjIncludedInSessionSample=1; _gid=GA1.2.592682141.1626017440; _gat_UA-646417-39=1; x-ua-device=tablet; nocache=detail-6' \
--data-raw 'variantNumber=2001257' \
--compressed
我已经设置了标题、cookies 和所有我认为可能是问题的内容,但没有一个有效。 我似乎不明白为什么这会在 curl 中起作用但在 python 中不起作用。 我不明白问题是什么,我可以利用一些帮助,谢谢:)
答案 0 :(得分:0)
通过在 cookie 中设置 x-crsf-token 解决了这个问题 :) 谢谢!