在这里的一些专家的帮助下,我能够制造出效果很好的刮板。
必不可少的代码行实际上是:
data = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "17155"}}
r = requests.post('https://www.partssource.com/catalog/Service', json=data).json()"
但是该站点最近将其链接从partsfinder.com更改为partssource.com,并且该代码似乎可以正常工作。
只是想知道我是否可以在原始代码上使用一个技巧来使它再次工作。
感谢任何想法,谢谢!
import requests
import pandas as pd
df = pd.read_excel(r'C:\Users\212677036\Documents\Part Number Input.xlsx')
PN = pd.DataFrame(df, columns=['Product code'])
9
i = 0
Total_rows = PN.shape[0]
partnumlist = []
partnumlist1 = []
partnumlist2 = []
partnumlist3 = []
partnumlist4 = []
partnumlist5 = []
while i < Total_rows:
data = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "17155"}}
r = requests.post('https://www.partssource.com/catalog/Service', json=data).json()
partnumlist.append(r['Data']['PartOptions'][0]['YourPrice'])
data1 = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "17475"}}
r1 = requests.post('https://www.partssource.com/catalog/Service', json=data1).json()
partnumlist1.append(r1['Data']['PartOptions'][0]['YourPrice'])
data2 = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "16880"}}
r2 = requests.post('https://www.partssource.com/catalog/Service', json=data2).json()
partnumlist2.append(r2['Data']['PartOptions'][0]['YourPrice'])
data3 = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "47221"}}
r3 = requests.post('https://www.partssource.com/catalog/Service', json=data3).json()
partnumlist3.append(r3['Data']['PartOptions'][0]['YourPrice'])
data4 = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "17045"}}
r4 = requests.post('https://www.partssource.com/catalog/Service', json=data4).json()
partnumlist4.append(r4['Data']['PartOptions'][0]['YourPrice'])
data5 = {"partOptionFilter": {"PartNumber": PN.iloc[i, 0], "AlternativeOemId": "17055"}}
r5 = requests.post('https://www.partssource.com/catalog/Service', json=data5).json()
partnumlist5.append(r5['Data']['PartOptions'][0]['YourPrice'])
i=i+1
list_of_dataframes = [pd.DataFrame(partnumlist),pd.DataFrame(partnumlist1),
pd.DataFrame(partnumlist2), pd.DataFrame(partnumlist3),
pd.DataFrame(partnumlist4), pd.DataFrame(partnumlist5)]
pd.concat(list_of_dataframes).to_csv(r'C:\Users\212677036\Documents\output25.csv')
答案 0 :(得分:1)
在DevTools
/ Firefox
中使用Chrome
之后,我创建了此代码。
页面使用不同的url,发送不同的数据,使用不同的键获取结果。
您将不得不使用DevTools
来观察浏览器到服务器的更多请求,才能识别出如何在data
中使用更多参数
import requests
query = "mobile"
data = {
# "facets":[{
# "name":"OEM",
# "value":"GE%20Healthcare"
# }],
"facets":[],
"facilityId": 38451,
"id_ins": "a2a3d332-73a7-4194-ad87-fe7412388916",
"limit": 15,
"query": query,
"referer": "/catalog/Service",
"start": 0,
# "urlParams":[{
# "name": "OEM",
# "value": "GE Healthcare"
# }],
"urlParams":[]
}
r = requests.post('https://prodasf-vip.partsfinder.com/Orion/CatalogService/api/v1/search', json=data)
data = r.json()
#print(data['products'])
#print(data['products'][0])
#print(data['products'][0]['options'])
#print(data['products'][0]['options'][0])
print(data['products'][0]['options'][0]['price'])
编辑(2020.09.01)
如果您有手动查询,请使用for
循环多次运行相同的代码,但使用不同的查询。而且,当您获得一个查询的数据时,请使用for
循环从data['products']
中获取所有价格
编辑(2020.09.06)
我在start
中添加了变量limit
和get_data()
,后来又在循环for start in range(0, limit*10, limit)
中运行它以获取10页(每页包含100个元素)
import requests
# import pprint # to format data on screen `pprint.pprint()
# --- fucntions ---
def get_data(query, start=0, limit=15): # <-- new (2020.09.06)
"""Get data from server"""
payload = {
# "facets":[{
# "name":"OEM",
# "value":"GE%20Healthcare"
# }],
"facets":[],
"facilityId": 38451,
"id_ins": "a2a3d332-73a7-4194-ad87-fe7412388916",
"limit": limit, # <-- new (2020.09.06)
"query": query,
"referer": "/catalog/Service",
"start": start, # <-- new (2020.09.06)
# "urlParams":[{
# "name": "OEM",
# "value": "GE Healthcare"
# }],
"urlParams":[]
}
r = requests.post('https://prodasf-vip.partsfinder.com/Orion/CatalogService/api/v1/search', json=payload)
data = r.json()
return data
def show_data(data):
#print(data['products'])
#print(data['products'][0])
#print(data['products'][0]['options'])
#print(data['products'][0]['options'][0])
print(data['products'][0]['options'][0]['price'])
for item in data['products']:
#pprint.pprint(item)
print('title:', item['title'])
if not item['options']:
print('price: unknown')
else:
for option in item['options']:
print('price:', option['price'], '| vendor item number:', option['vendorItemNumber'])
print('---')
def filter_data(data):
filtered = []
for item in data['products']:
if not item['options']:
filtered.append( [] ) # unknown
else:
all_prices = [option['price'] for option in item['options']]
filtered.append( all_prices )
return filtered
# --- main ---
all_queries = ["mobile", 'GE Healthcare']
limit = 100 # <-- new (2020.09.06)
for query in all_queries:
# pagination
for start in range(0, limit*10, limit): # <-- new (2020.09.06)
print('\n--- QUERY:', query, 'start:', start, '---\n')
data = get_data(query, start, limit)
#show_data(data)
filtered = filter_data(data)
print(filtered)